From bcbcbf7a01470fb5c66ab16a3229bb4fc9a1c670 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=9D=8E=E5=AF=85?= <liyinhgqw@gmail.com>
Date: Mon, 5 Mar 2018 10:16:24 +0800
Subject: [PATCH] Refactor tensor, accelerate gpu initialization time

---
 mace/core/allocator.h                         |  34 +-
 mace/core/buffer.h                            | 356 ++++++++++++++++++
 mace/core/mace.cc                             |   1 -
 mace/core/net.cc                              |   8 +-
 mace/core/net.h                               |   6 +-
 mace/core/preallocated_pooled_allocator.h     |  24 +-
 .../runtime/hexagon/hexagon_control_wrapper.h |   3 -
 mace/core/runtime/opencl/opencl_allocator.cc  |  24 +-
 mace/core/runtime/opencl/opencl_allocator.h   |  16 +-
 .../opencl_preallocated_pooled_allocator.cc   |  30 --
 .../opencl_preallocated_pooled_allocator.h    |  45 ---
 mace/core/serializer.cc                       |  82 ----
 mace/core/serializer.h                        |  29 --
 mace/core/tensor.h                            | 292 +++++---------
 mace/core/workspace.cc                        | 112 +++---
 mace/core/workspace.h                         |  23 +-
 mace/kernels/batch_norm.h                     |   2 +-
 mace/kernels/conv_2d.h                        |   2 +-
 mace/kernels/depthwise_conv2d.h               |   2 +-
 mace/kernels/fully_connected.h                |   2 +-
 mace/kernels/opencl/buffer_to_image.cc        |  11 +-
 mace/kernels/opencl/cl/buffer_to_image.cl     |  29 +-
 mace/kernels/opencl/conv_2d_opencl.cc         |   3 +-
 mace/kernels/opencl/resize_bilinear_opencl.cc |  13 +-
 24 files changed, 623 insertions(+), 526 deletions(-)
 create mode 100644 mace/core/buffer.h
 delete mode 100644 mace/core/runtime/opencl/opencl_preallocated_pooled_allocator.cc
 delete mode 100644 mace/core/runtime/opencl/opencl_preallocated_pooled_allocator.h
 delete mode 100644 mace/core/serializer.cc
 delete mode 100644 mace/core/serializer.h
diff --git a/mace/core/allocator.h b/mace/core/allocator.h
index f265ed24..7ab65f0b 100644
--- a/mace/core/allocator.h
+++ b/mace/core/allocator.h
@@ -26,17 +26,17 @@ class Allocator {
  public:
   Allocator() {}
   virtual ~Allocator() noexcept {}
-  virtual void *New(size_t nbytes) = 0;
+  virtual void *New(size_t nbytes) const = 0;
   virtual void *NewImage(const std::vector<size_t> &image_shape,
-                         const DataType dt) = 0;
-  virtual void Delete(void *data) = 0;
-  virtual void DeleteImage(void *data) = 0;
-  virtual void *Map(void *buffer, size_t nbytes) = 0;
+                         const DataType dt) const = 0;
+  virtual void Delete(void *data) const = 0;
+  virtual void DeleteImage(void *data) const = 0;
+  virtual void *Map(void *buffer, size_t offset, size_t nbytes) const = 0;
   virtual void *MapImage(void *buffer,
                          const std::vector<size_t> &image_shape,
-                         std::vector<size_t> &mapped_image_pitch) = 0;
-  virtual void Unmap(void *buffer, void *mapper_ptr) = 0;
-  virtual bool OnHost() = 0;
+                         std::vector<size_t> *mapped_image_pitch) const = 0;
+  virtual void Unmap(void *buffer, void *mapper_ptr) const = 0;
+  virtual bool OnHost() const = 0;
 
   template <typename T>
   T *New(size_t num_elements) {
@@ -52,7 +52,7 @@ class Allocator {
 class CPUAllocator : public Allocator {
  public:
   ~CPUAllocator() override {}
-  void *New(size_t nbytes) override {
+  void *New(size_t nbytes) const override {
     VLOG(3) << "Allocate CPU buffer: " << nbytes;
     void *data = nullptr;
 #ifdef __ANDROID__
@@ -67,27 +67,29 @@ class CPUAllocator : public Allocator {
   }
 
   void *NewImage(const std::vector<size_t> &shape,
-                 const DataType dt) override {
+                 const DataType dt) const override {
     LOG(FATAL) << "Allocate CPU image";
     return nullptr;
   }
 
-  void Delete(void *data) override {
+  void Delete(void *data) const override {
     VLOG(3) << "Free CPU buffer";
     free(data);
   }
-  void DeleteImage(void *data) override {
+  void DeleteImage(void *data) const override {
     LOG(FATAL) << "Free CPU image";
     free(data);
   };
-  void *Map(void *buffer, size_t nbytes) override { return buffer; }
+  void *Map(void *buffer, size_t offset, size_t nbytes) const override {
+    return (char*)buffer + offset;
+  }
   void *MapImage(void *buffer,
                  const std::vector<size_t> &image_shape,
-                 std::vector<size_t> &mapped_image_pitch) override {
+                 std::vector<size_t> *mapped_image_pitch) const override {
     return buffer;
   }
-  void Unmap(void *buffer, void *mapper_ptr) override {}
-  bool OnHost() override { return true; }
+  void Unmap(void *buffer, void *mapper_ptr) const override {}
+  bool OnHost() const override { return true; }
 };
 
 std::map<int32_t, Allocator *> *gAllocatorRegistry();
diff --git a/mace/core/buffer.h b/mace/core/buffer.h
new file mode 100644
index 00000000..bada99c0
--- /dev/null
+++ b/mace/core/buffer.h
@@ -0,0 +1,356 @@
+//
+// Copyright (c) 2018 XiaoMi All rights reserved.
+//
+
+#ifndef MACE_CORE_BUFFER_H_
+#define MACE_CORE_BUFFER_H_
+
+#include "mace/core/types.h"
+#include "mace/core/allocator.h"
+#include <vector>
+
+namespace mace {
+
+class BufferBase {
+ public:
+  BufferBase() : size_(0) {}
+  BufferBase(index_t size) : size_(size) {}
+  virtual ~BufferBase() {}
+
+  virtual void *buffer() = 0;
+
+  virtual const void *raw_data() const = 0;
+
+  virtual void *raw_mutable_data() = 0;
+
+  virtual void *Map(index_t offset,
+                    index_t length,
+                    std::vector<size_t> *pitch) const = 0;
+
+  virtual void UnMap(void *mapped_ptr) const = 0;
+
+  virtual void Map(std::vector<size_t> *pitch) = 0;
+
+  virtual void UnMap() = 0;
+
+  virtual void Resize(index_t size) = 0;
+
+  virtual void Copy(void *src, index_t offset, index_t length) = 0;
+
+  virtual bool OnHost() const = 0;
+
+  virtual index_t offset() const {
+    return 0;
+  };
+
+  template<typename T>
+  const T *data() const {
+    return reinterpret_cast<const T *>(raw_data());
+  }
+
+  template<typename T>
+  T *mutable_data() {
+    return reinterpret_cast<T *>(raw_mutable_data());
+  }
+
+  index_t size() const {
+    return size_;
+  }
+
+ protected:
+  index_t size_;
+};
+
+class Buffer : public BufferBase {
+ public:
+  Buffer(Allocator *allocator)
+    : BufferBase(0),
+      allocator_(allocator),
+      buf_(nullptr),
+      mapped_buf_(nullptr),
+      is_data_owner_(true) {}
+
+  Buffer(Allocator *allocator, index_t size)
+    : BufferBase(size),
+      allocator_(allocator),
+      mapped_buf_(nullptr),
+      is_data_owner_(true) {
+    buf_ = allocator->New(size);
+  }
+
+  Buffer(Allocator *allocator, void *data, index_t size)
+    : BufferBase(size),
+      allocator_(allocator),
+      buf_(data),
+      mapped_buf_(nullptr),
+      is_data_owner_(false) {}
+
+  virtual ~Buffer() {
+    if (mapped_buf_ != nullptr) {
+      UnMap();
+    }
+    if (is_data_owner_ && buf_ != nullptr) {
+      allocator_->Delete(buf_);
+    }
+  }
+
+  void *buffer() {
+    MACE_CHECK_NOTNULL(buf_);
+    return buf_;
+  };
+
+  const void *raw_data() const {
+    if (OnHost()) {
+      MACE_CHECK_NOTNULL(buf_);
+      return buf_;
+    } else {
+      MACE_CHECK_NOTNULL(mapped_buf_);
+      return mapped_buf_;
+    }
+  }
+
+  void *raw_mutable_data() {
+    if (OnHost()) {
+      MACE_CHECK_NOTNULL(buf_);
+      return buf_;
+    } else {
+      MACE_CHECK_NOTNULL(mapped_buf_);
+      return mapped_buf_;
+    }
+  }
+
+  void *Map(index_t offset, index_t length, std::vector<size_t> *pitch) const {
+    MACE_CHECK_NOTNULL(buf_);
+    return allocator_->Map(buf_, offset, length);
+  }
+
+  void UnMap(void *mapped_ptr) const {
+    MACE_CHECK_NOTNULL(buf_);
+    MACE_CHECK_NOTNULL(mapped_ptr);
+    allocator_->Unmap(buf_, mapped_ptr);
+  }
+
+  void Map(std::vector<size_t> *pitch) {
+    MACE_CHECK(mapped_buf_ == nullptr, "buf has been already mapped");
+    mapped_buf_ = Map(0, size_, pitch);
+  };
+
+  void UnMap() {
+    UnMap(mapped_buf_);
+    mapped_buf_ = nullptr;
+  }
+
+  void Resize(index_t size) {
+    MACE_CHECK(is_data_owner_,
+               "data is not owned by this buffer, cannot resize");
+    if (size != size_) {
+      if (buf_ != nullptr) {
+        allocator_->Delete(buf_);
+      }
+      size_ = size;
+      buf_ = allocator_->New(size);
+    }
+  }
+
+  void Copy(void *src, index_t offset, index_t length) {
+    MACE_CHECK_NOTNULL(mapped_buf_);
+    MACE_CHECK(length <= size_, "out of buffer");
+    memcpy(mapped_buf_, (char *) src + offset, length);
+  }
+
+  bool OnHost() const {
+    return allocator_->OnHost();
+  }
+
+ private:
+  Allocator *allocator_;
+  void *buf_;
+  void *mapped_buf_;
+  bool is_data_owner_;
+
+ DISABLE_COPY_AND_ASSIGN(Buffer);
+};
+
+class Image : public BufferBase {
+ public:
+  Image()
+    : BufferBase(0),
+      allocator_(GetDeviceAllocator(OPENCL)),
+      buf_(nullptr),
+      mapped_buf_(nullptr) {}
+
+  Image(std::vector<size_t> shape, DataType data_type)
+    : BufferBase(std::accumulate(shape.begin(), shape.end(),
+                                 1, std::multiplies<index_t>())
+                   * GetEnumTypeSize(data_type)),
+      allocator_(GetDeviceAllocator(OPENCL)),
+      mapped_buf_(nullptr) {
+    shape_ = shape;
+    data_type_ = data_type;
+    buf_ = allocator_->NewImage(shape, data_type);
+  }
+
+  virtual ~Image() {
+    if (mapped_buf_ != nullptr) {
+      UnMap();
+    }
+    if (buf_ != nullptr) {
+      allocator_->DeleteImage(buf_);
+    }
+  }
+
+  void *buffer() {
+    MACE_CHECK_NOTNULL(buf_);
+    return buf_;
+  };
+
+  const void *raw_data() const {
+    MACE_CHECK_NOTNULL(mapped_buf_);
+    return mapped_buf_;
+  }
+
+  void *raw_mutable_data() {
+    MACE_CHECK_NOTNULL(mapped_buf_);
+    return mapped_buf_;
+  }
+
+  std::vector<size_t> image_shape() const {
+    return shape_;
+  }
+
+  void *Map(index_t offset, index_t length, std::vector<size_t> *pitch) const {
+    MACE_NOT_IMPLEMENTED;
+    return nullptr;
+  }
+
+  void UnMap(void *mapped_ptr) const {
+    MACE_CHECK_NOTNULL(buf_);
+    MACE_CHECK_NOTNULL(mapped_ptr);
+    allocator_->Unmap(buf_, mapped_ptr);
+  }
+
+  void Map(std::vector<size_t> *pitch) {
+    MACE_CHECK_NOTNULL(buf_);
+    MACE_CHECK(mapped_buf_ == nullptr, "buf has been already mapped");
+    MACE_CHECK_NOTNULL(pitch);
+    mapped_buf_ = allocator_->MapImage(buf_, shape_, pitch);
+  };
+
+  void UnMap() {
+    UnMap(mapped_buf_);
+    mapped_buf_ = nullptr;
+  };
+
+  void Resize(index_t size) {}
+
+  void Copy(void *src, index_t offset, index_t length) {
+    MACE_NOT_IMPLEMENTED;
+  }
+
+  bool OnHost() const {
+    return allocator_->OnHost();
+  }
+
+ private:
+  Allocator *allocator_;
+  std::vector<size_t> shape_;
+  DataType data_type_;
+  void *buf_;
+  void *mapped_buf_;
+
+ DISABLE_COPY_AND_ASSIGN(Image);
+};
+
+class BufferSlice : public BufferBase {
+ public:
+  BufferSlice() {}
+  BufferSlice(BufferBase *buffer, index_t offset, index_t length)
+    : BufferBase(buffer->size()),
+      buffer_(buffer),
+      mapped_buf_(nullptr),
+      offset_(offset),
+      length_(length) {
+    MACE_CHECK(offset >= 0, "buffer slice offset should >= 0");
+    MACE_CHECK(offset + length <= size_,
+               "buffer slice offset + length (",
+               offset,
+               " + ",
+               length,
+               ") should <= ",
+               size_);
+  }
+  BufferSlice(const BufferSlice &other) : BufferSlice(other.buffer_,
+                                                      other.offset_,
+                                                      other.length_) {}
+
+  ~BufferSlice() {
+    if (mapped_buf_ != nullptr) {
+      UnMap();
+    }
+  }
+
+  void *buffer() {
+    return buffer_->buffer();
+  };
+
+  const void *raw_data() const {
+    if (OnHost()) {
+      MACE_CHECK_NOTNULL(buffer_);
+      return (char *) buffer_->raw_data() + offset_;
+    } else {
+      MACE_CHECK_NOTNULL(mapped_buf_);
+      return mapped_buf_;
+    }
+  }
+
+  void *raw_mutable_data() {
+    MACE_NOT_IMPLEMENTED;
+    return nullptr;
+  }
+
+  void *Map(index_t offset, index_t length, std::vector<size_t> *pitch) const {
+    MACE_NOT_IMPLEMENTED;
+    return nullptr;
+  }
+
+  void UnMap(void *mapped_ptr) const {
+    MACE_NOT_IMPLEMENTED;
+  }
+
+  void Map(std::vector<size_t> *pitch) {
+    MACE_CHECK_NOTNULL(buffer_);
+    MACE_CHECK(mapped_buf_ == nullptr, "mapped buf is not null");
+    mapped_buf_ = buffer_->Map(offset_, length_, pitch);
+  };
+
+  void UnMap() {
+    MACE_CHECK_NOTNULL(mapped_buf_);
+    buffer_->UnMap(mapped_buf_);
+    mapped_buf_ = nullptr;
+  };
+
+  void Resize(index_t size) {
+  }
+
+  void Copy(void *src, index_t offset, index_t length) {
+    MACE_NOT_IMPLEMENTED;
+  }
+
+  index_t offset() const {
+    return offset_;
+  }
+
+  bool OnHost() const {
+    return buffer_->OnHost();
+  }
+
+ private:
+  BufferBase *buffer_;
+  void *mapped_buf_;
+  index_t offset_;
+  index_t length_;
+};
+
+}
+
+#endif // MACE_CORE_BUFFER_H_
diff --git a/mace/core/mace.cc b/mace/core/mace.cc
index 81e26246..58591a78 100644
--- a/mace/core/mace.cc
+++ b/mace/core/mace.cc
@@ -542,7 +542,6 @@ MaceEngine::MaceEngine(const NetDef *net_def, DeviceType device_type) :
     if (!net->Run()) {
       LOG(FATAL) << "Net init run failed";
     }
-    ws_->RemoveUnsedTensor();
     net_ = std::move(CreateNet(op_registry_, *net_def, ws_.get(), device_type));
   }
 }
diff --git a/mace/core/net.cc b/mace/core/net.cc
index 46febc68..ce44b951 100644
--- a/mace/core/net.cc
+++ b/mace/core/net.cc
@@ -15,14 +15,14 @@ NetBase::NetBase(const std::shared_ptr<const OperatorRegistry> op_registry,
                  DeviceType type)
     : op_registry_(op_registry), name_(net_def->name()) {}
 
-SimpleNet::SimpleNet(const std::shared_ptr<const OperatorRegistry> op_registry,
+SerialNet::SerialNet(const std::shared_ptr<const OperatorRegistry> op_registry,
                      const std::shared_ptr<const NetDef> net_def,
                      Workspace *ws,
                      DeviceType type,
                      const NetMode mode)
     :  NetBase(op_registry, net_def, ws, type),
       device_type_(type) {
-  MACE_LATENCY_LOGGER(1, "Constructing SimpleNet ", net_def->name());
+  MACE_LATENCY_LOGGER(1, "Constructing SerialNet ", net_def->name());
   for (int idx = 0; idx < net_def->op_size(); ++idx) {
     const auto &operator_def = net_def->op(idx);
     VLOG(3) << "Creating operator " << operator_def.name() << "("
@@ -36,7 +36,7 @@ SimpleNet::SimpleNet(const std::shared_ptr<const OperatorRegistry> op_registry,
   }
 }
 
-bool SimpleNet::Run(RunMetadata *run_metadata) {
+bool SerialNet::Run(RunMetadata *run_metadata) {
   MACE_MEMORY_LOGGING_GUARD();
   MACE_LATENCY_LOGGER(1, "Running net");
   for (auto iter = operators_.begin(); iter != operators_.end(); ++iter) {
@@ -99,7 +99,7 @@ std::unique_ptr<NetBase> CreateNet(
     Workspace *ws,
     DeviceType type,
     const NetMode mode) {
-  std::unique_ptr<NetBase> net(new SimpleNet(op_registry, net_def, ws, type, mode));
+  std::unique_ptr<NetBase> net(new SerialNet(op_registry, net_def, ws, type, mode));
   return net;
 }
 
diff --git a/mace/core/net.h b/mace/core/net.h
index 1f57794f..3b625393 100644
--- a/mace/core/net.h
+++ b/mace/core/net.h
@@ -33,9 +33,9 @@ class NetBase {
   DISABLE_COPY_AND_ASSIGN(NetBase);
 };
 
-class SimpleNet : public NetBase {
+class SerialNet : public NetBase {
  public:
-  SimpleNet(const std::shared_ptr<const OperatorRegistry> op_registry,
+  SerialNet(const std::shared_ptr<const OperatorRegistry> op_registry,
             const std::shared_ptr<const NetDef> net_def,
             Workspace *ws,
             DeviceType type,
@@ -47,7 +47,7 @@ class SimpleNet : public NetBase {
   std::vector<std::unique_ptr<OperatorBase> > operators_;
   DeviceType device_type_;
 
-  DISABLE_COPY_AND_ASSIGN(SimpleNet);
+  DISABLE_COPY_AND_ASSIGN(SerialNet);
 };
 
 std::unique_ptr<NetBase> CreateNet(
diff --git a/mace/core/preallocated_pooled_allocator.h b/mace/core/preallocated_pooled_allocator.h
index dcb35070..75cf4117 100644
--- a/mace/core/preallocated_pooled_allocator.h
+++ b/mace/core/preallocated_pooled_allocator.h
@@ -5,6 +5,7 @@
 #ifndef MACE_CORE_PREALLOCATED_POOLED_ALLOCATOR_H_
 #define MACE_CORE_PREALLOCATED_POOLED_ALLOCATOR_H_
 
+#include <unordered_map>
 #include "mace/core/allocator.h"
 
 namespace mace {
@@ -13,17 +14,26 @@ class PreallocatedPooledAllocator {
  public:
   PreallocatedPooledAllocator() {}
 
-  virtual ~PreallocatedPooledAllocator() noexcept {}
+  ~PreallocatedPooledAllocator() noexcept {}
 
-  virtual void PreallocateImage(int mem_id,
-                                const std::vector<size_t> &image_shape,
-                                DataType data_type) = 0;
+  void SetBuffer(int mem_id, std::unique_ptr<BufferBase> &&buffer) {
+    buffers_[mem_id] = std::move(buffer);
+  }
 
-  virtual void *GetImage(int mem_id) = 0;
+  BufferBase *GetBuffer(int mem_id) {
+    if (buffers_.find(mem_id) != buffers_.end()) {
+      return buffers_[mem_id].get();
+    } else {
+      return nullptr;
+    }
+  }
 
-  virtual bool HasImage(int mem_id) = 0;
+  virtual bool HasBuffer(int mem_id) {
+    return buffers_.find(mem_id) != buffers_.end();
+  }
 
-  virtual std::vector<size_t> GetImageSize(int mem_id) = 0;
+ private:
+  std::unordered_map<int, std::unique_ptr<BufferBase>> buffers_;
 };
 
 } // namespace mace
diff --git a/mace/core/runtime/hexagon/hexagon_control_wrapper.h b/mace/core/runtime/hexagon/hexagon_control_wrapper.h
index f24f9522..09a1c778 100644
--- a/mace/core/runtime/hexagon/hexagon_control_wrapper.h
+++ b/mace/core/runtime/hexagon/hexagon_control_wrapper.h
@@ -11,7 +11,6 @@
 #include "mace/core/runtime/hexagon/quantize.h"
 #include "mace/core/tensor.h"
 #include "mace/public/mace.h"
-#include "mace/core/serializer.h"
 
 namespace mace {
 
@@ -37,7 +36,6 @@ class HexagonControlWrapper {
   void SetGraphMode(int mode);
 
  private:
-  // CAVEAT: Need offset as HVX library reserves some ids
   static constexpr int NODE_ID_OFFSET = 10000;
 
   inline uint32_t node_id(uint32_t nodeid) {
@@ -45,7 +43,6 @@ class HexagonControlWrapper {
   }
 
   int nn_id_;
-  Serializer serializer_;
   Quantizer quantizer_;
 
   std::vector<std::vector<index_t>> input_shapes_;
diff --git a/mace/core/runtime/opencl/opencl_allocator.cc b/mace/core/runtime/opencl/opencl_allocator.cc
index 929b4818..abc88bdd 100644
--- a/mace/core/runtime/opencl/opencl_allocator.cc
+++ b/mace/core/runtime/opencl/opencl_allocator.cc
@@ -35,7 +35,7 @@ static cl_channel_type DataTypeToCLChannelType(const DataType t) {
 OpenCLAllocator::OpenCLAllocator() {}
 
 OpenCLAllocator::~OpenCLAllocator() {}
-void *OpenCLAllocator::New(size_t nbytes) {
+void *OpenCLAllocator::New(size_t nbytes) const {
   VLOG(3) << "Allocate OpenCL buffer: " << nbytes;
   cl_int error;
   cl::Buffer *buffer = new cl::Buffer(OpenCLRuntime::Global()->context(),
@@ -47,7 +47,7 @@ void *OpenCLAllocator::New(size_t nbytes) {
 }
 
 void *OpenCLAllocator::NewImage(const std::vector<size_t> &image_shape,
-                                const DataType dt) {
+                                const DataType dt) const {
   MACE_CHECK(image_shape.size() == 2) << "Image shape's size must equal 2";
   VLOG(3) << "Allocate OpenCL image: " << image_shape[0] << ", " << image_shape[1];
 
@@ -67,7 +67,7 @@ void *OpenCLAllocator::NewImage(const std::vector<size_t> &image_shape,
   return cl_image;
 }
 
-void OpenCLAllocator::Delete(void *buffer) {
+void OpenCLAllocator::Delete(void *buffer) const {
   VLOG(3) << "Free OpenCL buffer";
   if (buffer != nullptr) {
     cl::Buffer *cl_buffer = static_cast<cl::Buffer *>(buffer);
@@ -75,7 +75,7 @@ void OpenCLAllocator::Delete(void *buffer) {
   }
 }
 
-void OpenCLAllocator::DeleteImage(void *buffer) {
+void OpenCLAllocator::DeleteImage(void *buffer) const {
   VLOG(3) << "Free OpenCL image";
   if (buffer != nullptr) {
     cl::Image2D *cl_image = static_cast<cl::Image2D *>(buffer);
@@ -83,13 +83,13 @@ void OpenCLAllocator::DeleteImage(void *buffer) {
   }
 }
 
-void *OpenCLAllocator::Map(void *buffer, size_t nbytes) {
+void *OpenCLAllocator::Map(void *buffer, size_t offset, size_t nbytes) const {
   auto cl_buffer = static_cast<cl::Buffer *>(buffer);
   auto queue = OpenCLRuntime::Global()->command_queue();
   // TODO(heliangliang) Non-blocking call
   cl_int error;
   void *mapped_ptr =
-      queue.enqueueMapBuffer(*cl_buffer, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0,
+      queue.enqueueMapBuffer(*cl_buffer, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, offset,
                              nbytes, nullptr, nullptr, &error);
   MACE_CHECK(error == CL_SUCCESS);
   return mapped_ptr;
@@ -98,33 +98,33 @@ void *OpenCLAllocator::Map(void *buffer, size_t nbytes) {
 // TODO : there is something wrong with half type.
 void *OpenCLAllocator::MapImage(void *buffer,
                                 const std::vector<size_t> &image_shape,
-                                std::vector<size_t> &mapped_image_pitch) {
+                                std::vector<size_t> *mapped_image_pitch) const {
   MACE_CHECK(image_shape.size() == 2) << "Just support map 2d image";
   auto cl_image = static_cast<cl::Image2D *>(buffer);
   std::array<size_t, 3> origin = {0, 0, 0};
   std::array<size_t, 3> region = {image_shape[0], image_shape[1], 1};
 
-  mapped_image_pitch.resize(2);
+  mapped_image_pitch->resize(2);
   cl_int error;
   void *mapped_ptr =
       OpenCLRuntime::Global()->command_queue().enqueueMapImage(*cl_image,
                                                             CL_TRUE, CL_MAP_READ | CL_MAP_WRITE,
                                                             origin, region,
-                                                            &mapped_image_pitch[0],
-                                                            &mapped_image_pitch[1],
+                                                            mapped_image_pitch->data(),
+                                                            mapped_image_pitch->data() + 1,
                                                             nullptr, nullptr, &error);
   MACE_CHECK(error == CL_SUCCESS) << error;
 
   return mapped_ptr;
 }
 
-void OpenCLAllocator::Unmap(void *buffer, void *mapped_ptr) {
+void OpenCLAllocator::Unmap(void *buffer, void *mapped_ptr) const {
   auto cl_buffer = static_cast<cl::Buffer *>(buffer);
   auto queue = OpenCLRuntime::Global()->command_queue();
   MACE_CHECK(queue.enqueueUnmapMemObject(*cl_buffer, mapped_ptr, nullptr,
                                          nullptr) == CL_SUCCESS);
 }
 
-bool OpenCLAllocator::OnHost() { return false; }
+bool OpenCLAllocator::OnHost() const { return false; }
 
 }  // namespace mace
diff --git a/mace/core/runtime/opencl/opencl_allocator.h b/mace/core/runtime/opencl/opencl_allocator.h
index 6aa8fbf9..7065b132 100644
--- a/mace/core/runtime/opencl/opencl_allocator.h
+++ b/mace/core/runtime/opencl/opencl_allocator.h
@@ -15,7 +15,7 @@ class OpenCLAllocator : public Allocator {
 
   ~OpenCLAllocator() override;
 
-  void *New(size_t nbytes) override;
+  void *New(size_t nbytes) const override;
 
   /*
    * Use Image2D with RGBA (128-bit) format to represent the image.
@@ -23,21 +23,21 @@ class OpenCLAllocator : public Allocator {
    * @ shape : [depth, ..., height, width ].
    */
   void *NewImage(const std::vector<size_t> &image_shape,
-                 const DataType dt) override;
+                 const DataType dt) const override;
 
-  void Delete(void *buffer) override;
+  void Delete(void *buffer) const override;
 
-  void DeleteImage(void *buffer) override;
+  void DeleteImage(void *buffer) const override;
 
-  void *Map(void *buffer, size_t nbytes) override;
+  void *Map(void *buffer, size_t offset, size_t nbytes) const override;
 
   void *MapImage(void *buffer,
                  const std::vector<size_t> &image_shape,
-                 std::vector<size_t> &mapped_image_pitch) override;
+                 std::vector<size_t> *mapped_image_pitch) const override;
 
-  void Unmap(void *buffer, void *mapped_ptr) override;
+  void Unmap(void *buffer, void *mapped_ptr) const override;
 
-  bool OnHost() override;
+  bool OnHost() const override;
 };
 
 }  // namespace mace
diff --git a/mace/core/runtime/opencl/opencl_preallocated_pooled_allocator.cc b/mace/core/runtime/opencl/opencl_preallocated_pooled_allocator.cc
deleted file mode 100644
index f1ef73a9..00000000
--- a/mace/core/runtime/opencl/opencl_preallocated_pooled_allocator.cc
+++ /dev/null
@@ -1,30 +0,0 @@
-//
-// Copyright (c) 2018 XiaoMi All rights reserved.
-//
-
-#include "mace/core/runtime/opencl/opencl_preallocated_pooled_allocator.h"
-
-namespace mace {
-
-OpenCLPreallocatedPooledAllocator::OpenCLPreallocatedPooledAllocator()
-  : allocator(GetDeviceAllocator(DeviceType::OPENCL)) {
-}
-
-OpenCLPreallocatedPooledAllocator::~OpenCLPreallocatedPooledAllocator() {
-}
-
-void OpenCLPreallocatedPooledAllocator::PreallocateImage(int mem_id,
-                                                         const std::vector<
-                                                           size_t> &image_shape,
-                                                         DataType data_type) {
-  MACE_CHECK(!this->HasImage(mem_id), "Memory already exists: ", mem_id);
-  VLOG(2) << "Preallocate OpenCL image: " << mem_id << " "
-          << image_shape[0] << ", " << image_shape[1];
-  images_[mem_id] = std::move(std::unique_ptr<void, std::function<void(void *)>>(
-    allocator->NewImage(image_shape, data_type), [this](void *p) {
-      this->allocator->DeleteImage(p);
-    }));
-  image_shapes_[mem_id] = image_shape;
-}
-
-} // namespace mace
diff --git a/mace/core/runtime/opencl/opencl_preallocated_pooled_allocator.h b/mace/core/runtime/opencl/opencl_preallocated_pooled_allocator.h
deleted file mode 100644
index 960ff174..00000000
--- a/mace/core/runtime/opencl/opencl_preallocated_pooled_allocator.h
+++ /dev/null
@@ -1,45 +0,0 @@
-//
-// Copyright (c) 2018 XiaoMi All rights reserved.
-//
-
-#ifndef MACE_CORE_RUNTIME_OPENCL_PREALLOCATED_POOLED_ALLOCATOR_H_
-#define MACE_CORE_RUNTIME_OPENCL_PREALLOCATED_POOLED_ALLOCATOR_H_
-
-#include "mace/core/preallocated_pooled_allocator.h"
-#include <unordered_map>
-
-namespace mace {
-
-class OpenCLPreallocatedPooledAllocator : public PreallocatedPooledAllocator {
- public:
-  OpenCLPreallocatedPooledAllocator();
-
-  ~OpenCLPreallocatedPooledAllocator() override;
-
-  void PreallocateImage(int mem_id,
-                        const std::vector<size_t> &image_shape,
-                        DataType data_type) override;
-
-  inline void *GetImage(int mem_id) override {
-    MACE_CHECK(HasImage(mem_id), "image does not exist");
-    return images_[mem_id].get();
-  }
-
-  inline bool HasImage(int mem_id) override {
-    return images_.find(mem_id) != images_.end();
-  }
-
-  inline std::vector<size_t> GetImageSize(int mem_id) override {
-    return image_shapes_[mem_id];
-  }
-
- private:
-  std::unordered_map<int, std::unique_ptr<void, std::function<void(void *)>>>
-    images_;
-  std::unordered_map<int, std::vector<size_t>> image_shapes_;
-  Allocator *allocator;
-};
-
-} // namepsace mace
-
-#endif // MACE_CORE_RUNTIME_OPENCL_PREALLOCATED_POOLED_ALLOCATOR_H_
diff --git a/mace/core/serializer.cc b/mace/core/serializer.cc
deleted file mode 100644
index c171205f..00000000
--- a/mace/core/serializer.cc
+++ /dev/null
@@ -1,82 +0,0 @@
-//
-// Copyright (c) 2017 XiaoMi All rights reserved.
-//
-
-#include "mace/core/serializer.h"
-
-namespace mace {
-
-std::unique_ptr<ConstTensor> Serializer::Serialize(const Tensor &tensor,
-                                                   const std::string &name) {
-  MACE_NOT_IMPLEMENTED;
-  return nullptr;
-}
-
-std::unique_ptr<Tensor> Serializer::Deserialize(const ConstTensor &const_tensor,
-                                                DeviceType type) {
-  std::unique_ptr<Tensor> tensor(
-      new Tensor(GetDeviceAllocator(type), const_tensor.data_type()));
-  std::vector<index_t> dims;
-  for (const index_t d : const_tensor.dims()) {
-    dims.push_back(d);
-  }
-  tensor->Resize(dims);
-
-  switch (const_tensor.data_type()) {
-    case DT_HALF:
-      tensor->Copy<half>(reinterpret_cast<const half *>(const_tensor.data()),
-                         const_tensor.data_size());
-      break;
-    case DT_FLOAT:
-      tensor->Copy<float>(reinterpret_cast<const float *>(const_tensor.data()),
-                          const_tensor.data_size());
-      break;
-    case DT_DOUBLE:
-      tensor->Copy<double>(
-          reinterpret_cast<const double *>(const_tensor.data()),
-          const_tensor.data_size());
-      break;
-    case DT_INT32:
-      tensor->Copy<int32_t>(
-          reinterpret_cast<const int32_t *>(const_tensor.data()),
-          const_tensor.data_size());
-      break;
-    case DT_INT64:
-      tensor->Copy<int64_t>(
-          reinterpret_cast<const int64_t *>(const_tensor.data()),
-          const_tensor.data_size());
-      break;
-    case DT_UINT8:
-      tensor->Copy<uint8_t>(
-          reinterpret_cast<const uint8_t *>(const_tensor.data()),
-          const_tensor.data_size());
-      break;
-    case DT_INT16:
-      tensor->CopyWithCast<int32_t, uint16_t>(
-          reinterpret_cast<const int32_t *>(const_tensor.data()),
-          const_tensor.data_size());
-      break;
-    case DT_INT8:
-      tensor->CopyWithCast<int32_t, int8_t>(
-          reinterpret_cast<const int32_t *>(const_tensor.data()),
-          const_tensor.data_size());
-      break;
-    case DT_UINT16:
-      tensor->CopyWithCast<int32_t, int16_t>(
-          reinterpret_cast<const int32_t *>(const_tensor.data()),
-          const_tensor.data_size());
-      break;
-    case DT_BOOL:
-      tensor->CopyWithCast<int32_t, bool>(
-          reinterpret_cast<const int32_t *>(const_tensor.data()),
-          const_tensor.data_size());
-      break;
-    default:
-      MACE_NOT_IMPLEMENTED;
-      break;
-  }
-
-  return tensor;
-}
-
-}  // namespace mace
diff --git a/mace/core/serializer.h b/mace/core/serializer.h
deleted file mode 100644
index fcc98a72..00000000
--- a/mace/core/serializer.h
+++ /dev/null
@@ -1,29 +0,0 @@
-//
-// Copyright (c) 2017 XiaoMi All rights reserved.
-//
-
-#ifndef MACE_CORE_SERIALIZER_H_
-#define MACE_CORE_SERIALIZER_H_
-
-#include "mace/core/tensor.h"
-#include "mace/public/mace.h"
-
-namespace mace {
-
-class Serializer {
- public:
-  Serializer() {}
-  ~Serializer() {}
-
-  std::unique_ptr<ConstTensor> Serialize(const Tensor &tensor,
-                                         const std::string &name);
-
-  std::unique_ptr<Tensor> Deserialize(const ConstTensor &const_tensor,
-                                      DeviceType type);
-
-  DISABLE_COPY_AND_ASSIGN(Serializer);
-};
-
-}  // namespace mace
-
-#endif  // MACE_CORE_SERIALIZER_H_
diff --git a/mace/core/tensor.h b/mace/core/tensor.h
index 0fcda89c..cfe832ed 100644
--- a/mace/core/tensor.h
+++ b/mace/core/tensor.h
@@ -5,7 +5,7 @@
 #ifndef MACE_CORE_TENSOR_H_
 #define MACE_CORE_TENSOR_H_
 
-#include "mace/core/allocator.h"
+#include "mace/core/buffer.h"
 #include "mace/utils/logging.h"
 #include "mace/core/types.h"
 #include "mace/public/mace.h"
@@ -46,7 +46,6 @@ namespace mace {
   CASES_WITH_DEFAULT(TYPE_ENUM, STMTS, LOG(FATAL) << "Type not set"; \
                      , LOG(FATAL) << "Unexpected type: " << TYPE_ENUM;)
 
-
 namespace numerical_chars {
 inline std::ostream &operator<<(std::ostream &os, char c) {
   return std::is_signed<char>::value ? os << static_cast<int>(c)
@@ -64,27 +63,30 @@ inline std::ostream &operator<<(std::ostream &os, unsigned char c) {
 
 class Tensor {
  public:
-  Tensor()
-      : alloc_(GetDeviceAllocator(DeviceType::CPU)),
-        size_(0),
-        dtype_(DT_FLOAT),
-        buffer_(nullptr),
-        data_(nullptr),
-        unused_(false),
-        is_image_(false){
-};
-
   Tensor(Allocator *alloc, DataType type)
-      : alloc_(alloc),
-        size_(0),
-        dtype_(type),
-        buffer_(nullptr),
-        data_(nullptr),
-        unused_(false),
-        is_image_(false){
-  };
+    : allocator_(alloc),
+      dtype_(type),
+      buffer_(nullptr),
+      is_buffer_owner_(true) {};
+
+  Tensor(BufferBase *buffer, DataType dtype)
+    : dtype_(dtype),
+      buffer_(buffer),
+      is_buffer_owner_(false) {}
+
+  Tensor(const BufferSlice &buffer_slice, DataType dtype)
+    : dtype_(dtype),
+      buffer_slice_(buffer_slice),
+      is_buffer_owner_(false) {
+    buffer_ = &buffer_slice_;
+  }
+
+  Tensor() : Tensor(GetDeviceAllocator(CPU), DT_FLOAT) {}
 
   ~Tensor() {
+    if (is_buffer_owner_ && buffer_ != nullptr) {
+      delete buffer_;
+    }
   }
 
   inline DataType dtype() const { return dtype_; }
@@ -93,182 +95,113 @@ class Tensor {
 
   inline const std::vector<index_t> &shape() const { return shape_; }
 
-  inline const std::vector<size_t> &image_shape() const { return image_shape_; }
-
-  inline const bool is_image() const { return is_image_; }
-
   inline index_t dim_size() const { return shape_.size(); }
 
   inline index_t dim(unsigned int index) const {
     MACE_CHECK(index < shape_.size(), "Dim out of range: ",
-                                      index, " >= ", shape_.size());
+               index, " >= ", shape_.size());
     return shape_[index];
   }
 
-  inline index_t size() const { return size_; }
-
-  inline index_t raw_size() const { return size_ * SizeOfType(); }
-
-  inline const bool unused() const { return unused_; }
-
-  inline int64_t NumElements() const {
+  inline index_t size() const {
     return std::accumulate(shape_.begin(), shape_.end(), 1,
                            std::multiplies<int64_t>());
   }
 
-  inline const bool OnHost() const { return alloc_->OnHost(); }
-
-  /*
-   * Map the device buffer as CPU buffer to access the data, unmap must be
-   * called later
-   */
-  inline void Map() const {
-    if (!OnHost()) {
-      MACE_CHECK(buffer_ != nullptr && data_ == nullptr);
-      data_ = alloc_->Map(buffer_.get(), size_ * SizeOfType());
-    }
+  inline index_t raw_size() const {
+    return size() * SizeOfType();
   }
 
-  inline void MapImage(std::vector<size_t> &mapped_image_pitch) const {
-    MACE_CHECK(!OnHost() && buffer_ != nullptr && data_ == nullptr);
-    data_ = alloc_->MapImage(buffer_.get(), image_shape_, mapped_image_pitch);
+  inline void *buffer() const {
+    MACE_CHECK(buffer_ != nullptr && buffer_->buffer() != nullptr,
+               "buffer is null");
+    return buffer_->buffer();
   }
 
-  /*
-   *  Unmap the device buffer
-   */
-  inline void Unmap() const {
-    if (!OnHost()) {
-      MACE_CHECK(buffer_ != nullptr && data_ != nullptr);
-      alloc_->Unmap(buffer_.get(), data_);
-      data_ = nullptr;
-    }
+  inline index_t buffer_offset() const {
+    return buffer_->offset();
   }
 
-  void *buffer() const { return buffer_.get(); }
-
   inline const void *raw_data() const {
-    void *data = MappedBuffer();
-    MACE_CHECK(data != nullptr || size_ == 0,
-               "The tensor is of non-zero shape, but its data is not allocated "
-               "or mapped yet.");
-    return data;
+    MACE_CHECK(buffer_ != nullptr, "buffer is null");
+    return buffer_->raw_data();
   }
 
-  template <typename T>
+  template<typename T>
   inline const T *data() const {
-    return static_cast<const T *>(raw_data());
+    MACE_CHECK(buffer_ != nullptr, "buffer is null");
+    return buffer_->data<T>();
   }
 
   inline void *raw_mutable_data() {
-    void *data = MappedBuffer();
-    MACE_CHECK(data != nullptr || size_ == 0,
-               "The tensor is of non-zero shape, but its data is not allocated "
-               "or mapped yet.");
-    return data;
+    MACE_CHECK(buffer_ != nullptr, "buffer is null");
+    return buffer_->raw_mutable_data();
   }
 
-  template <typename T>
+  template<typename T>
   inline T *mutable_data() {
-    return static_cast<T *>(raw_mutable_data());
+    MACE_CHECK(buffer_ != nullptr, "buffer is null");
+    return static_cast<T *>(buffer_->raw_mutable_data());
   }
 
-  inline void Resize(const std::vector<index_t> &shape) {
-    MACE_CHECK(!is_image_ || buffer_ == nullptr,
-               "Resize is not for image, use ResizeImage instead.");
-    is_image_ = false;
+  inline void Reshape(const std::vector<index_t> &shape) {
     shape_ = shape;
-    index_t size = NumElements();
-    if (size_ != size) {
-      size_ = size;
-      MACE_CHECK(data_ == nullptr, "Buffer must be unmapped before resize");
-      CASES(dtype_,
-            (buffer_ =
-               std::move(std::unique_ptr<void, std::function<void(void *)>>(
-                 alloc_->New(size_ * sizeof(T)),
-                 [this](void *p) {
-                   this->alloc_->Delete(p);
-                 })
-               )));
-    }
+    MACE_CHECK(raw_size() <= buffer_->size());
   }
 
-  inline void ResizeImage(const std::vector<index_t> &shape,
-                          const std::vector<size_t> &image_shape) {
-    MACE_CHECK(is_image_ || buffer_ == nullptr,
-               "ResizeImage is not for buffer, use Resize instead.");
-    is_image_ = true;
+  inline void Resize(const std::vector<index_t> &shape) {
     shape_ = shape;
-    index_t size = NumElements();
-    if (size_ != size) {
-      size_ = size;
-      image_shape_ = image_shape;
-      if (!preallocated_image_shape_.empty()) {
-        MACE_CHECK(preallocated_image_shape_[0] >= image_shape[0]
-                     && preallocated_image_shape_[1] >= image_shape[1],
-                   "image shape not large enough: preallocated ",
-                   preallocated_image_shape_[0],
-                   " ",
-                   preallocated_image_shape_[1],
-                   "apply for ",
-                   image_shape[0],
-                   " ",
-                   image_shape[1]);
-      } else {
-        buffer_ = std::move(std::unique_ptr<void, std::function<void(void *)>>(
-          alloc_->NewImage(image_shape, dtype_),
-          [this](void *p) { this->alloc_->DeleteImage(p); }));
-        preallocated_image_shape_ = image_shape;
-      }
+    if (buffer_ != nullptr) {
+      buffer_->Resize(raw_size());
+    } else {
+      buffer_ = new Buffer(allocator_, raw_size());
+      is_buffer_owner_ = true;
     }
   }
 
   inline void ResizeLike(const Tensor &other) {
-    if (other.is_image()) {
-      ResizeImage(other.shape(), other.image_shape());
-    } else {
-      Resize(other.shape());
-    }
+    Resize(other.shape());
   }
 
   inline void ResizeLike(const Tensor *other) {
-    if (other->is_image()) {
-      ResizeImage(other->shape(), other->image_shape());
-    } else {
-      Resize(other->shape());
+    Resize(other->shape());
+  }
+
+  inline void ResizeImage(const std::vector<index_t> &shape,
+                          const std::vector<size_t> &image_shape) {
+    shape_ = shape;
+    if (buffer_ == nullptr) {
+      buffer_ = new Image(image_shape, dtype_);
+      is_buffer_owner_ = true;
     }
   }
 
-  inline void PreallocateImage(void *image,
-                               const std::vector<size_t>& image_shape) {
-    is_image_ = true;
-    buffer_ = std::move(std::unique_ptr<void, std::function<void(void *)>>(
-      image, [](void *p) {
-        // tensor does not have ownership of preallocated memory
-      }));
-    preallocated_image_shape_ = image_shape;
+  inline void CopyBytes(const void *src, size_t size) {
+    MappingGuard guard(this);
+    memcpy(buffer_->raw_mutable_data(), src, size);
   }
 
-  template <typename T>
-  inline void Copy(const T *src, index_t size) {
-    MACE_CHECK(size == size_, "copy src and dst with different size.");
-    CopyBytes(static_cast<const void *>(src), sizeof(T) * size);
+  template<typename T>
+  inline void Copy(const T *src, index_t length) {
+    MACE_CHECK(length == size(), "copy src and dst with different size.");
+    CopyBytes(static_cast<const void *>(src), sizeof(T) * length);
   }
 
-  template <typename SrcType, typename DstType>
-  inline void CopyWithCast(const SrcType *src, size_t size) {
-    MACE_CHECK(static_cast<index_t>(size) == size_,
-               "copy src and dst with different size.");
-    std::unique_ptr<DstType[]> buffer(new DstType[size]);
-    for (size_t i = 0; i < size; ++i) {
-      buffer[i] = static_cast<DstType>(src[i]);
-    }
-    CopyBytes(static_cast<const void *>(buffer.get()), sizeof(DstType) * size);
+  inline void Copy(const Tensor &other) {
+    dtype_ = other.dtype_;
+    ResizeLike(other);
+    MappingGuard map_other(&other);
+    CopyBytes(other.raw_data(), other.size() * SizeOfType());
   }
 
-  inline void CopyBytes(const void *src, size_t size) {
-    MappingGuard map_this(this);
-    memcpy(raw_mutable_data(), src, size);
+  inline size_t SizeOfType() const {
+    size_t type_size = 0;
+    CASES(dtype_, type_size = sizeof(T));
+    return type_size;
+  }
+
+  inline BufferBase *UnderlyingBuffer() const {
+    return buffer_;
   }
 
   inline void DebugPrint() const {
@@ -281,8 +214,8 @@ class Tensor {
     os.str("");
     os.clear();
     MappingGuard guard(this);
-    for (int i = 0; i < size_; ++i) {
-      if ( i != 0 && i % shape_[3] == 0) {
+    for (int i = 0; i < size(); ++i) {
+      if (i != 0 && i % shape_[3] == 0) {
         os << "\n";
       }
       CASES(dtype_, (os << (this->data<T>()[i]) << ", "));
@@ -291,37 +224,11 @@ class Tensor {
               << dim(2) << ", " << dim(3) << "], content:\n" << os.str();
   }
 
-  inline size_t SizeOfType() const {
-    size_t type_size = 0;
-    CASES(dtype_, type_size = sizeof(T));
-    return type_size;
-  }
-
-  inline void Copy(const Tensor &other) {
-    alloc_ = other.alloc_;
-    dtype_ = other.dtype_;
-    ResizeLike(other);
-    MappingGuard map_other(&other);
-    if (is_image_) {
-      LOG(FATAL) << "Not support copy image tensor, please use Copy API.";
-    } else {
-      CopyBytes(other.raw_data(), size_ * SizeOfType());
-    }
-  }
-
-  inline void MarkUnused() {
-    this->unused_ = true;
-  }
-
   class MappingGuard {
    public:
     MappingGuard(const Tensor *tensor) : tensor_(tensor) {
       if (tensor_ != nullptr) {
-        if (tensor_->is_image()) {
-          tensor_->MapImage(mapped_image_pitch_);
-        } else {
-          tensor_->Map();
-        }
+        tensor_->buffer_->Map(&mapped_image_pitch_);
       }
     }
 
@@ -331,42 +238,29 @@ class Tensor {
     }
 
     ~MappingGuard() {
-      if (tensor_ != nullptr) tensor_->Unmap();
+      if (tensor_ != nullptr) tensor_->buffer_->UnMap();
     }
 
-    inline const std::vector<size_t> &mapped_image_pitch() const { return mapped_image_pitch_; }
+    inline const std::vector<size_t> &mapped_image_pitch() const {
+      return mapped_image_pitch_;
+    }
 
    private:
     const Tensor *tensor_;
     std::vector<size_t> mapped_image_pitch_;
 
-    DISABLE_COPY_AND_ASSIGN(MappingGuard);
+   DISABLE_COPY_AND_ASSIGN(MappingGuard);
   };
 
  private:
-  inline void *MappedBuffer() const {
-    if (OnHost()) {
-      return buffer_.get();
-    }
-    return data_;
-  }
-
-  Allocator *alloc_;
-  index_t size_;
+  Allocator *allocator_;
   DataType dtype_;
-  // Raw buffer, must be mapped as host accessable data before
-  // read or write
-  std::unique_ptr<void, std::function<void(void*)>> buffer_;
-  // Mapped buffer
-  mutable void *data_;
   std::vector<index_t> shape_;
-  // Image for opencl
-  bool unused_;
-  bool is_image_;
-  std::vector<size_t> image_shape_;
-  std::vector<size_t> preallocated_image_shape_;
+  BufferBase *buffer_;
+  BufferSlice buffer_slice_;
+  bool is_buffer_owner_;
 
-  DISABLE_COPY_AND_ASSIGN(Tensor);
+ DISABLE_COPY_AND_ASSIGN(Tensor);
 };
 
 }  // namespace tensor
diff --git a/mace/core/workspace.cc b/mace/core/workspace.cc
index 371ab2e0..a7adb939 100644
--- a/mace/core/workspace.cc
+++ b/mace/core/workspace.cc
@@ -6,21 +6,11 @@
 #include <vector>
 
 #include "mace/core/workspace.h"
-#include "mace/core/serializer.h"
 #include "mace/core/arg_helper.h"
-#include "mace/core/runtime/opencl/opencl_preallocated_pooled_allocator.h"
 #include "mace/utils/timer.h"
 
 namespace mace {
 
-std::vector<std::string> Workspace::Tensors() const {
-  std::vector<std::string> names;
-  for (auto &entry : tensor_map_) {
-    names.push_back(entry.first);
-  }
-  return names;
-}
-
 Tensor *Workspace::CreateTensor(const std::string &name,
                                 Allocator *alloc,
                                 DataType type) {
@@ -28,21 +18,12 @@ Tensor *Workspace::CreateTensor(const std::string &name,
     VLOG(3) << "Tensor " << name << " already exists. Skipping.";
   } else {
     VLOG(3) << "Creating Tensor " << name;
-    tensor_map_[name] = std::move(std::unique_ptr<Tensor>(new Tensor(alloc, type)));
+    tensor_map_[name] =
+      std::move(std::unique_ptr<Tensor>(new Tensor(alloc, type)));
   }
   return GetTensor(name);
 }
 
-bool Workspace::RemoveTensor(const std::string &name) {
-  auto it = tensor_map_.find(name);
-  if (it != tensor_map_.end()) {
-    VLOG(3) << "Removing blob " << name << " from this workspace.";
-    tensor_map_.erase(it);
-    return true;
-  }
-  return false;
-}
-
 const Tensor *Workspace::GetTensor(const std::string &name) const {
   if (tensor_map_.count(name)) {
     return tensor_map_.at(name).get();
@@ -52,26 +33,51 @@ const Tensor *Workspace::GetTensor(const std::string &name) const {
   return nullptr;
 }
 
-
-void Workspace::RemoveUnsedTensor() {
-  auto iter = tensor_map_.begin();
-  auto end_iter = tensor_map_.end();
-  while(iter != end_iter) {
-    auto old_iter = iter++;
-    if(old_iter->second->unused()) {
-      tensor_map_.erase(old_iter);
-    }
-  }
-}
-
 Tensor *Workspace::GetTensor(const std::string &name) {
   return const_cast<Tensor *>(
-      static_cast<const Workspace *>(this)->GetTensor(name));
+    static_cast<const Workspace *>(this)->GetTensor(name));
+}
+
+std::vector<std::string> Workspace::Tensors() const {
+  std::vector<std::string> names;
+  for (auto &entry : tensor_map_) {
+    names.push_back(entry.first);
+  }
+  return names;
 }
 
 void Workspace::LoadModelTensor(const NetDef &net_def, DeviceType type) {
   MACE_LATENCY_LOGGER(1, "Load model tensors");
-  Serializer serializer;
+  index_t model_data_size = 0;
+  unsigned char *model_data_ptr = nullptr;
+  for (auto &const_tensor : net_def.tensors()) {
+    if (model_data_ptr == nullptr
+      || reinterpret_cast<long long>(const_tensor.data())
+        < reinterpret_cast<long long>(model_data_ptr)) {
+      model_data_ptr = const_cast<unsigned char *>(const_tensor.data());
+    }
+  }
+  for (auto &const_tensor : net_def.tensors()) {
+    model_data_size = std::max(model_data_size,
+                               static_cast<index_t>(
+                                 (reinterpret_cast<long long>(const_tensor.data())
+                                   - reinterpret_cast<long long>(model_data_ptr))
+                                   + const_tensor.data_size()
+                                     * GetEnumTypeSize(const_tensor.data_type())));
+  }
+  VLOG(3) << "Model data size: " << model_data_size;
+
+  if (type == DeviceType::CPU) {
+    tensor_buffer_ = std::move(std::unique_ptr<Buffer>(
+      new Buffer(GetDeviceAllocator(type), model_data_ptr, model_data_size)));
+  } else {
+    tensor_buffer_ = std::move(std::unique_ptr<Buffer>(
+      new Buffer(GetDeviceAllocator(type), model_data_size)));
+    tensor_buffer_->Map(nullptr);
+    tensor_buffer_->Copy(model_data_ptr, 0, model_data_size);
+    tensor_buffer_->UnMap();
+  }
+
   for (auto &const_tensor : net_def.tensors()) {
     MACE_LATENCY_LOGGER(2, "Load tensor ", const_tensor.name());
     VLOG(3) << "Tensor name: " << const_tensor.name()
@@ -79,9 +85,24 @@ void Workspace::LoadModelTensor(const NetDef &net_def, DeviceType type) {
             << ", shape: "
             << MakeString(std::vector<index_t>(const_tensor.dims().begin(),
                                                const_tensor.dims().end()));
-    tensor_map_[const_tensor.name()] =
-        serializer.Deserialize(const_tensor, type);
+    std::vector<index_t> dims;
+    for (const index_t d : const_tensor.dims()) {
+      dims.push_back(d);
+    }
+
+    index_t
+      offset = (long long) const_tensor.data() - (long long) model_data_ptr;
+    std::unique_ptr<Tensor> tensor(
+      new Tensor(BufferSlice(tensor_buffer_.get(),
+                             offset,
+                             const_tensor.data_size()
+                               * GetEnumTypeSize(const_tensor.data_type())),
+                 const_tensor.data_type()));
+
+    tensor->Reshape(dims);
+    tensor_map_[const_tensor.name()] = std::move(tensor);
   }
+
   if (type == DeviceType::OPENCL) {
     CreateImageOutputTensor(net_def);
   }
@@ -91,9 +112,6 @@ void Workspace::CreateImageOutputTensor(const NetDef &net_def) {
   if (!net_def.has_mem_arena() || net_def.mem_arena().mem_block_size() == 0) {
     return;
   }
-  preallocated_allocator_ =
-    std::move(std::unique_ptr<PreallocatedPooledAllocator>(
-      new OpenCLPreallocatedPooledAllocator));
 
   DataType dtype = DataType::DT_INVALID;
   // We use the data type of the first op (with mem id, must be image),
@@ -116,18 +134,16 @@ void Workspace::CreateImageOutputTensor(const NetDef &net_def) {
   }
   MACE_CHECK(dtype != DataType::DT_INVALID, "data type is invalid.");
   for (auto &mem_block: net_def.mem_arena().mem_block()) {
-    preallocated_allocator_->PreallocateImage(mem_block.mem_id(),
-                                              {mem_block.x(), mem_block.y()},
-                                              dtype);
+    std::unique_ptr<BufferBase>
+      image_buf(new Image({mem_block.x(), mem_block.y()}, dtype));
+    preallocated_allocator_.SetBuffer(mem_block.mem_id(), std::move(image_buf));
   }
   VLOG(3) << "Preallocate image to tensors";
-  auto allocator = GetDeviceAllocator(DeviceType::OPENCL);
   for (auto &op: net_def.op()) {
     if (op.has_mem_id()) {
-      CreateTensor(op.output(0), allocator, dtype);
-      tensor_map_[op.output(0)]->PreallocateImage(
-        preallocated_allocator_->GetImage(op.mem_id()),
-        preallocated_allocator_->GetImageSize(op.mem_id()));
+      std::unique_ptr<Tensor> tensor
+        (new Tensor(preallocated_allocator_.GetBuffer(op.mem_id()), dtype));
+      tensor_map_[op.output(0)] = std::move(tensor);
     }
   }
 }
diff --git a/mace/core/workspace.h b/mace/core/workspace.h
index c1bc17a5..84274914 100644
--- a/mace/core/workspace.h
+++ b/mace/core/workspace.h
@@ -15,26 +15,23 @@ class Workspace {
  public:
   typedef std::map<std::string, std::unique_ptr<Tensor>> TensorMap;
 
-  Workspace()
-    : preallocated_allocator_(nullptr) {}
+  Workspace() {}
   ~Workspace() {}
 
-  std::vector<std::string> Tensors() const;
-
-  Tensor *CreateTensor(const std::string &name, Allocator *alloc, DataType type);
-
-  bool RemoveTensor(const std::string &name);
-
-  void RemoveUnsedTensor();
+  Tensor *CreateTensor(const std::string &name,
+                       Allocator *alloc,
+                       DataType type);
 
   inline bool HasTensor(const std::string &name) const {
-    return tensor_map_.count(name);
+    return tensor_map_.find(name) != tensor_map_.end();
   }
 
   const Tensor *GetTensor(const std::string &name) const;
 
   Tensor *GetTensor(const std::string &name);
 
+  std::vector<std::string> Tensors() const;
+
   void LoadModelTensor(const NetDef &net_def, DeviceType type);
 
  private:
@@ -42,9 +39,11 @@ class Workspace {
 
   TensorMap tensor_map_;
 
-  std::unique_ptr<PreallocatedPooledAllocator> preallocated_allocator_;
+  std::unique_ptr<BufferBase> tensor_buffer_;
+
+  PreallocatedPooledAllocator preallocated_allocator_;
 
-  DISABLE_COPY_AND_ASSIGN(Workspace);
+ DISABLE_COPY_AND_ASSIGN(Workspace);
 };
 
 }  // namespace mace
diff --git a/mace/kernels/batch_norm.h b/mace/kernels/batch_norm.h
index 0d489f40..bceee6ff 100644
--- a/mace/kernels/batch_norm.h
+++ b/mace/kernels/batch_norm.h
@@ -127,7 +127,7 @@ struct BatchNormFunctor : BatchNormFunctorBase {
         }
       }
     }
-    DoActivation(output_ptr, output_ptr, output->NumElements(), activation_,
+    DoActivation(output_ptr, output_ptr, output->size(), activation_,
                  relux_max_limit_);
   }
 };
diff --git a/mace/kernels/conv_2d.h b/mace/kernels/conv_2d.h
index e6c22cd9..f4f49565 100644
--- a/mace/kernels/conv_2d.h
+++ b/mace/kernels/conv_2d.h
@@ -616,7 +616,7 @@ struct Conv2dFunctor : Conv2dFunctorBase {
         }
       }
     }
-    DoActivation(output_data, output_data, output->NumElements(), activation_,
+    DoActivation(output_data, output_data, output->size(), activation_,
                  relux_max_limit_);
   }
 };
diff --git a/mace/kernels/depthwise_conv2d.h b/mace/kernels/depthwise_conv2d.h
index 141119a9..da4d00be 100644
--- a/mace/kernels/depthwise_conv2d.h
+++ b/mace/kernels/depthwise_conv2d.h
@@ -402,7 +402,7 @@ struct DepthwiseConv2dFunctor : public DepthwiseConv2dFunctorBase {
         valid_h_stop, valid_w_start, valid_w_stop);
 
     output_ptr = output->mutable_data<T>();
-    DoActivation(output_ptr, output_ptr, output->NumElements(), activation_,
+    DoActivation(output_ptr, output_ptr, output->size(), activation_,
                  relux_max_limit_);
   }
 };
diff --git a/mace/kernels/fully_connected.h b/mace/kernels/fully_connected.h
index 4919e63a..031717f1 100644
--- a/mace/kernels/fully_connected.h
+++ b/mace/kernels/fully_connected.h
@@ -65,7 +65,7 @@ struct FullyConnectedFunctor : FullyConnectedBase {
       }
     }
 
-    DoActivation(output_ptr, output_ptr, output->NumElements(), activation_,
+    DoActivation(output_ptr, output_ptr, output->size(), activation_,
                  relux_max_limit_);
   }
 };
diff --git a/mace/kernels/opencl/buffer_to_image.cc b/mace/kernels/opencl/buffer_to_image.cc
index f9d9e781..bc906163 100644
--- a/mace/kernels/opencl/buffer_to_image.cc
+++ b/mace/kernels/opencl/buffer_to_image.cc
@@ -14,7 +14,6 @@ void BufferToImageFunctor<DeviceType::OPENCL, T>::operator()(Tensor *buffer,
                                                              const BufferType type,
                                                              Tensor *image,
                                                              StatsFuture *future) {
-  MACE_CHECK(!buffer->is_image()) << "buffer must be buffer-type";
   std::vector<size_t> image_shape;
   if (!i2b_) {
     CalImage2DShape(buffer->shape(), type, image_shape);
@@ -25,9 +24,9 @@ void BufferToImageFunctor<DeviceType::OPENCL, T>::operator()(Tensor *buffer,
     } else {
       image->ResizeImage(buffer->shape(), image_shape);
     }
-    buffer->MarkUnused();
   } else {
-    image_shape = image->image_shape();
+    Image *image_buf = dynamic_cast<Image*>(image->UnderlyingBuffer());
+    image_shape = image_buf->image_shape();
     buffer->Resize(image->shape());
   }
 
@@ -78,7 +77,11 @@ void BufferToImageFunctor<DeviceType::OPENCL, T>::operator()(Tensor *buffer,
                                          built_options);
 
   uint32_t idx = 0;
-  b2f_kernel.setArg(idx++, *(static_cast<const cl::Image2D *>(buffer->buffer())));
+  b2f_kernel.setArg(idx++, *(static_cast<const cl::Buffer *>(buffer->buffer())));
+  if (!i2b_) {
+    MACE_CHECK(buffer->buffer_offset() % GetEnumTypeSize(buffer->dtype()) == 0, "buffer offset not aligned");
+    b2f_kernel.setArg(idx++, static_cast<uint32_t>(buffer->buffer_offset() / GetEnumTypeSize(buffer->dtype())));
+  }
   if (type == ARGUMENT) {
     b2f_kernel.setArg(idx++, static_cast<uint32_t>(buffer->dim(0)));
   } else if(type == WEIGHT_HEIGHT) {
diff --git a/mace/kernels/opencl/cl/buffer_to_image.cl b/mace/kernels/opencl/cl/buffer_to_image.cl
index d4a28e6e..42ab6617 100644
--- a/mace/kernels/opencl/cl/buffer_to_image.cl
+++ b/mace/kernels/opencl/cl/buffer_to_image.cl
@@ -1,6 +1,7 @@
 #include <common.h>
 
 __kernel void filter_buffer_to_image(__global const DATA_TYPE *input, /* h, w, oc, ic */
+                                     __private const int input_offset,
                                      __private const int filter_w,
                                      __private const int out_channel,
                                      __private const int in_channel,
@@ -13,7 +14,7 @@ __kernel void filter_buffer_to_image(__global const DATA_TYPE *input, /* h, w, o
   const int in_channel_idx = w % rounded_in_channel;
   const int h_idx = hw_idx / filter_w;
   const int w_idx = hw_idx % filter_w;
-  const int offset = ((h_idx * filter_w + w_idx) * out_channel + out_channel_idx) * in_channel
+  const int offset = input_offset + ((h_idx * filter_w + w_idx) * out_channel + out_channel_idx) * in_channel
                            + in_channel_idx;
 
   VEC_DATA_TYPE(DATA_TYPE, 4) values = 0;
@@ -79,6 +80,7 @@ __kernel void filter_image_to_buffer(__global DATA_TYPE *output, /* h, w, oc, ic
 }
 
 __kernel void dw_filter_buffer_to_image(__global const DATA_TYPE *input, /* h, w, ic, m */
+                                        __private const int input_offset,
                                         __private const int filter_w,
                                         __private const int in_channel,
                                         __private const int multiplier,
@@ -92,7 +94,7 @@ __kernel void dw_filter_buffer_to_image(__global const DATA_TYPE *input, /* h, w
     const int h_idx = w / filter_w;
     const int w_idx = w % filter_w;
 
-    const int offset = mad24(mad24(h_idx, filter_w, w_idx),
+    const int offset = input_offset + mad24(mad24(h_idx, filter_w, w_idx),
                              in_channel, in_channel_idx);
 
     const int size = in_channel - in_channel_idx;
@@ -117,7 +119,7 @@ __kernel void dw_filter_buffer_to_image(__global const DATA_TYPE *input, /* h, w
     const int h_idx = hw_idx / filter_w;
     const int w_idx = hw_idx % filter_w;
 
-    const int offset = mad24(mad24(mad24(h_idx, filter_w, w_idx),
+    const int offset = input_offset + mad24(mad24(mad24(h_idx, filter_w, w_idx),
                 in_channel, in_channel_idx),
             multiplier, m);
     // TODO support multiplier > 1
@@ -128,6 +130,7 @@ __kernel void dw_filter_buffer_to_image(__global const DATA_TYPE *input, /* h, w
 }
 
 __kernel void in_out_buffer_to_image(__global const DATA_TYPE *input, /* nhwc */
+                                     __private const int input_offset,
                                      __private const int height,
                                      __private const int width,
                                      __private const int channels,
@@ -138,7 +141,7 @@ __kernel void in_out_buffer_to_image(__global const DATA_TYPE *input, /* nhwc */
   const int height_idx = h % height;
   const int width_idx = w % width;
   const int channel_idx = w / width * 4;
-  const int offset = ((batch_idx * height + height_idx) * width + width_idx) * channels
+  const int offset = input_offset + ((batch_idx * height + height_idx) * width + width_idx) * channels
                            + channel_idx;
 
   const int size = channels - channel_idx;
@@ -191,13 +194,16 @@ __kernel void in_out_image_to_buffer(__global DATA_TYPE *output, /* nhwc */
 }
 
 __kernel void arg_buffer_to_image(__global const DATA_TYPE *input, /* nhwc */
+                                  __private const int input_offset,
                                   __private const int count,
                                   __write_only image2d_t output) {
   int w = get_global_id(0);
   int h = get_global_id(1);
-  const int offset = w * 4;
 
-  const int size = count - offset;
+  const int offset = input_offset + w * 4;
+  const int size = count - w * 4;
+
+
   VEC_DATA_TYPE(DATA_TYPE, 4) values = 0;
   if (size < 4) {
     switch(size) {
@@ -241,6 +247,7 @@ __kernel void arg_image_to_buffer(__global DATA_TYPE *output, /* nhwc */
 
 
 __kernel void in_out_height_buffer_to_image(__global const DATA_TYPE *input, //nhwc
+                                            __private const int input_offset,
                                             __private const int height,
                                             __private const int width,
                                             __private const int channels,
@@ -253,7 +260,7 @@ __kernel void in_out_height_buffer_to_image(__global const DATA_TYPE *input, //n
   const int height_idx = (h % height_blks) << 2;
   const int width_idx = w % width;
   const int channel_idx = w / width;
-  int offset = ((batch_idx * height + height_idx) * width + width_idx) * channels
+  int offset = input_offset + ((batch_idx * height + height_idx) * width + width_idx) * channels
       + channel_idx;
 
   int size = height - height_idx;
@@ -304,6 +311,7 @@ __kernel void in_out_height_image_to_buffer(__global DATA_TYPE *output, //nhwc
 
 
 __kernel void in_out_width_buffer_to_image(__global const DATA_TYPE *input, /* nhwc */
+                                           __private const int input_offset,
                                            __private const int height,
                                            __private const int width,
                                            __private const int channels,
@@ -314,7 +322,7 @@ __kernel void in_out_width_buffer_to_image(__global const DATA_TYPE *input, /* n
   const int height_idx = h % height;
   const int width_idx = (w % width) << 2;
   const int channel_idx = w / width;
-  const int offset = ((batch_idx * height + height_idx) * width + width_idx) * channels
+  const int offset = input_offset + ((batch_idx * height + height_idx) * width + width_idx) * channels
       + channel_idx;
 
   int size = width - width_idx;
@@ -336,6 +344,7 @@ __kernel void in_out_width_buffer_to_image(__global const DATA_TYPE *input, /* n
 
 // only support 3x3 now
 __kernel void winograd_filter_buffer_to_image(__global const DATA_TYPE *input, //Oc, Ic, H, W
+                                              __private const int input_offset,
                                               __private const int in_channels,
                                               __private const int height,
                                               __private const int width,
@@ -345,7 +354,7 @@ __kernel void winograd_filter_buffer_to_image(__global const DATA_TYPE *input, /
   const int out_channels = get_global_size(1);
   const int out_channel_idx = h;
   const int in_channel_idx = w << 2;
-  const int offset = (out_channel_idx * in_channels + in_channel_idx) * height * width;
+  const int offset = input_offset + (out_channel_idx * in_channels + in_channel_idx) * height * width;
   const int length = min((in_channels - in_channel_idx) * 9, 36);
   DATA_TYPE in[36] = {0};
   DATA_TYPE4 tt;
@@ -390,7 +399,7 @@ __kernel void winograd_filter_buffer_to_image(__global const DATA_TYPE *input, /
   tu3[3] = tu3[2];
   tu3[2] = tt - tu3[1] / 2;
   tu3[1] = tt + tu3[1] / 2;
-  
+
   int2 coord = (int2)(w, h);
 #pragma unroll
   for (short i = 0; i < 4; ++i) {
diff --git a/mace/kernels/opencl/conv_2d_opencl.cc b/mace/kernels/opencl/conv_2d_opencl.cc
index 94aa7838..5dc97944 100644
--- a/mace/kernels/opencl/conv_2d_opencl.cc
+++ b/mace/kernels/opencl/conv_2d_opencl.cc
@@ -65,13 +65,12 @@ void Conv2dFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
 
   index_t kernel_h = filter->dim(0);
   index_t kernel_w = filter->dim(1);
-  if (!input->is_image() || strides_[0] != strides_[1] ||
+  if (strides_[0] != strides_[1] ||
       (dilations_[0] > 1 && (strides_[0] > 1 || kernel_h == 1))) {
     LOG(WARNING) << "OpenCL conv2d kernel with "
                  << "filter" << kernel_h << "x" << kernel_w << ","
                  << " stride " << strides_[0] << "x" << strides_[1]
                  << ",dilations " << dilations_[0] << "x" << dilations_[1]
-                 << " and input image: " << input->is_image()
                  << " is not implemented yet.";
     MACE_NOT_IMPLEMENTED;
   }
diff --git a/mace/kernels/opencl/resize_bilinear_opencl.cc b/mace/kernels/opencl/resize_bilinear_opencl.cc
index f4e91045..d8f4185e 100644
--- a/mace/kernels/opencl/resize_bilinear_opencl.cc
+++ b/mace/kernels/opencl/resize_bilinear_opencl.cc
@@ -27,13 +27,12 @@ void ResizeBilinearFunctor<DeviceType::OPENCL, T>::operator()(
   if (kernel_.get() == nullptr) {
     MACE_CHECK(out_height > 0 && out_width > 0);
     std::vector<index_t> output_shape{batch, out_height, out_width, channels};
-    if (input->is_image()) {
-      std::vector<size_t> output_image_shape;
-      CalImage2DShape(output_shape, BufferType::IN_OUT_CHANNEL, output_image_shape);
-      output->ResizeImage(output_shape, output_image_shape);
-    } else {
-      output->Resize(output_shape);
-    }
+
+    std::vector<size_t> output_image_shape;
+    CalImage2DShape(output_shape,
+                    BufferType::IN_OUT_CHANNEL,
+                    output_image_shape);
+    output->ResizeImage(output_shape, output_image_shape);
 
     float height_scale =
         CalculateResizeScale(in_height, out_height, align_corners_);
-- 
GitLab