From 358ebd4d86c44bdf03bed9d51be15d353eda9f49 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=9D=8E=E5=AF=85?= <liyinhgqw@gmail.com>
Date: Tue, 6 Mar 2018 11:47:41 +0800
Subject: [PATCH] 1. Fix ResizeLike. 2. Distinguish opencl buffer and image

---
 mace/core/buffer.h                            | 14 ++++-
 mace/core/tensor.h                            | 58 +++++++++++++++----
 mace/kernels/opencl/activation_opencl.cc      |  6 +-
 mace/kernels/opencl/addn.cc                   |  4 +-
 mace/kernels/opencl/batch_norm_opencl.cc      | 12 ++--
 mace/kernels/opencl/bias_add_opencl.cc        |  6 +-
 mace/kernels/opencl/buffer_to_image.cc        |  5 +-
 mace/kernels/opencl/concat.cc                 | 10 ++--
 mace/kernels/opencl/conv_2d_opencl_1x1.cc     |  8 +--
 mace/kernels/opencl/conv_2d_opencl_3x3.cc     |  8 +--
 mace/kernels/opencl/conv_2d_opencl_general.cc |  8 +--
 mace/kernels/opencl/depthwise_conv_opencl.cc  |  9 ++-
 mace/kernels/opencl/eltwise_opencl.cc         |  6 +-
 mace/kernels/opencl/fully_connected_opencl.cc |  8 +--
 mace/kernels/opencl/matmul.cc                 |  7 +--
 mace/kernels/opencl/pooling_opencl.cc         |  4 +-
 mace/kernels/opencl/resize_bilinear_opencl.cc |  4 +-
 mace/kernels/opencl/softmax_opencl.cc         |  4 +-
 mace/kernels/opencl/space_to_batch_opencl.cc  |  8 +--
 mace/kernels/opencl/winograd_transform.cc     | 10 ++--
 20 files changed, 119 insertions(+), 80 deletions(-)

diff --git a/mace/core/buffer.h b/mace/core/buffer.h
index bada99c0..c17c4a1d 100644
--- a/mace/core/buffer.h
+++ b/mace/core/buffer.h
@@ -241,7 +241,9 @@ class Image : public BufferBase {
     mapped_buf_ = nullptr;
   };
 
-  void Resize(index_t size) {}
+  void Resize(index_t size) {
+    MACE_NOT_IMPLEMENTED;
+  }
 
   void Copy(void *src, index_t offset, index_t length) {
     MACE_NOT_IMPLEMENTED;
@@ -263,7 +265,11 @@ class Image : public BufferBase {
 
 class BufferSlice : public BufferBase {
  public:
-  BufferSlice() {}
+  BufferSlice()
+    : buffer_(nullptr),
+      mapped_buf_(nullptr),
+      offset_(0),
+      length_(0) {}
   BufferSlice(BufferBase *buffer, index_t offset, index_t length)
     : BufferBase(buffer->size()),
       buffer_(buffer),
@@ -284,12 +290,13 @@ class BufferSlice : public BufferBase {
                                                       other.length_) {}
 
   ~BufferSlice() {
-    if (mapped_buf_ != nullptr) {
+    if (buffer_ != nullptr && mapped_buf_ != nullptr) {
       UnMap();
     }
   }
 
   void *buffer() {
+    MACE_CHECK_NOTNULL(buffer_);
     return buffer_->buffer();
   };
 
@@ -330,6 +337,7 @@ class BufferSlice : public BufferBase {
   };
 
   void Resize(index_t size) {
+    MACE_NOT_IMPLEMENTED;
   }
 
   void Copy(void *src, index_t offset, index_t length) {
diff --git a/mace/core/tensor.h b/mace/core/tensor.h
index cfe832ed..47fa3d11 100644
--- a/mace/core/tensor.h
+++ b/mace/core/tensor.h
@@ -5,6 +5,7 @@
 #ifndef MACE_CORE_TENSOR_H_
 #define MACE_CORE_TENSOR_H_
 
+#include "mace/core/runtime/opencl/cl2.hpp"
 #include "mace/core/buffer.h"
 #include "mace/utils/logging.h"
 #include "mace/core/types.h"
@@ -112,10 +113,24 @@ class Tensor {
     return size() * SizeOfType();
   }
 
-  inline void *buffer() const {
-    MACE_CHECK(buffer_ != nullptr && buffer_->buffer() != nullptr,
-               "buffer is null");
-    return buffer_->buffer();
+  inline bool has_opencl_image() const {
+    return buffer_ != nullptr && !buffer_->OnHost()
+      && typeid(*buffer_) == typeid(Image);
+  }
+
+  inline bool has_opencl_buffer() const {
+    return buffer_ != nullptr && !buffer_->OnHost()
+      && !has_opencl_image();
+  }
+
+  inline cl::Image *opencl_image() const {
+    MACE_CHECK(has_opencl_image(), "do not have image");
+    return static_cast<cl::Image*>(buffer_->buffer());
+  }
+
+  inline cl::Buffer *opencl_buffer() const {
+    MACE_CHECK(has_opencl_buffer(), "do not have opencl buffer");
+    return static_cast<cl::Buffer*>(buffer_->buffer());
   }
 
   inline index_t buffer_offset() const {
@@ -152,6 +167,7 @@ class Tensor {
   inline void Resize(const std::vector<index_t> &shape) {
     shape_ = shape;
     if (buffer_ != nullptr) {
+      MACE_CHECK(!has_opencl_image(), "Cannot resize image, use ResizeImage.");
       buffer_->Resize(raw_size());
     } else {
       buffer_ = new Buffer(allocator_, raw_size());
@@ -159,20 +175,38 @@ class Tensor {
     }
   }
 
-  inline void ResizeLike(const Tensor &other) {
-    Resize(other.shape());
-  }
-
-  inline void ResizeLike(const Tensor *other) {
-    Resize(other->shape());
-  }
-
   inline void ResizeImage(const std::vector<index_t> &shape,
                           const std::vector<size_t> &image_shape) {
     shape_ = shape;
     if (buffer_ == nullptr) {
       buffer_ = new Image(image_shape, dtype_);
       is_buffer_owner_ = true;
+    } else {
+      MACE_CHECK(has_opencl_image(), "Cannot ResizeImage buffer, use Resize.");
+      Image *image = dynamic_cast<Image*>(buffer_);
+      MACE_CHECK(image_shape[0] <= image->image_shape()[0]
+                   && image_shape[1] <= image->image_shape()[1]);
+    }
+  }
+
+  inline void ResizeLike(const Tensor &other) {
+    ResizeLike(&other);
+  }
+
+  inline void ResizeLike(const Tensor *other) {
+    if (other->has_opencl_image()) {
+      if (is_buffer_owner_ && buffer_ != nullptr && !has_opencl_image()) {
+        delete buffer_;
+        buffer_ = nullptr;
+      }
+      ResizeImage(other->shape(),
+                  dynamic_cast<Image *>(other->UnderlyingBuffer())->image_shape());
+    } else {
+      if (is_buffer_owner_ && buffer_ != nullptr && has_opencl_image()) {
+        delete buffer_;
+        buffer_ = nullptr;
+      }
+      Resize(other->shape());
     }
   }
 
diff --git a/mace/kernels/opencl/activation_opencl.cc b/mace/kernels/opencl/activation_opencl.cc
index dee01087..99b8a6bc 100644
--- a/mace/kernels/opencl/activation_opencl.cc
+++ b/mace/kernels/opencl/activation_opencl.cc
@@ -60,12 +60,12 @@ void ActivationFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
     kernel_ =
         runtime->BuildKernel("activation", kernel_name, built_options);
     int idx = 0;
-    kernel_.setArg(idx++, *(static_cast<const cl::Image2D *>(input->buffer())));
+    kernel_.setArg(idx++, *(input->opencl_image()));
     if (activation_ == PRELU) {
-      kernel_.setArg(idx++, *(static_cast<const cl::Image2D *>(alpha->buffer())));
+      kernel_.setArg(idx++, *(alpha->opencl_image()));
     }
     kernel_.setArg(idx++, static_cast<float>(relux_max_limit_));
-    kernel_.setArg(idx++, *(static_cast<cl::Image2D *>(output->buffer())));
+    kernel_.setArg(idx++, *(output->opencl_image()));
   }
 
   const uint32_t gws[3] = {static_cast<uint32_t>(channel_blocks),
diff --git a/mace/kernels/opencl/addn.cc b/mace/kernels/opencl/addn.cc
index 3495ddca..38388081 100644
--- a/mace/kernels/opencl/addn.cc
+++ b/mace/kernels/opencl/addn.cc
@@ -58,9 +58,9 @@ void AddNFunctor<DeviceType::OPENCL, T>::operator()(
     uint32_t idx = 0;
     for (auto input : input_tensors) {
       kernel_.setArg(idx++,
-                         *(static_cast<const cl::Image2D *>(input->buffer())));
+                         *(input->opencl_image()));
     }
-    kernel_.setArg(idx++, *(static_cast<cl::Image2D *>(output_tensor->buffer())));
+    kernel_.setArg(idx++, *(output_tensor->opencl_image()));
   }
 
   const uint32_t gws[2] = {
diff --git a/mace/kernels/opencl/batch_norm_opencl.cc b/mace/kernels/opencl/batch_norm_opencl.cc
index 7696e875..571bdd53 100644
--- a/mace/kernels/opencl/batch_norm_opencl.cc
+++ b/mace/kernels/opencl/batch_norm_opencl.cc
@@ -64,17 +64,17 @@ void BatchNormFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
         runtime->BuildKernel("batch_norm", kernel_name, built_options);
 
     uint32_t idx = 0;
-    kernel_.setArg(idx++, *(static_cast<const cl::Image2D *>(input->buffer())));
-    kernel_.setArg(idx++, *(static_cast<const cl::Image2D *>(scale->buffer())));
+    kernel_.setArg(idx++, *(input->opencl_image()));
+    kernel_.setArg(idx++, *(scale->opencl_image()));
     kernel_.setArg(idx++,
-                     *(static_cast<const cl::Image2D *>(offset->buffer())));
+                     *(offset->opencl_image()));
     if (!folded_constant_) {
       kernel_.setArg(idx++,
-                       *(static_cast<const cl::Image2D *>(mean->buffer())));
-      kernel_.setArg(idx++, *(static_cast<const cl::Image2D *>(var->buffer())));
+                       *(mean->opencl_image()));
+      kernel_.setArg(idx++, *(var->opencl_image()));
       kernel_.setArg(idx++, epsilon);
     }
-    kernel_.setArg(idx++, *(static_cast<cl::Image2D *>(output->buffer())));
+    kernel_.setArg(idx++, *(output->opencl_image()));
     kernel_.setArg(idx++, relux_max_limit_);
   }
 
diff --git a/mace/kernels/opencl/bias_add_opencl.cc b/mace/kernels/opencl/bias_add_opencl.cc
index 84eff1bf..c8507433 100644
--- a/mace/kernels/opencl/bias_add_opencl.cc
+++ b/mace/kernels/opencl/bias_add_opencl.cc
@@ -35,9 +35,9 @@ void BiasAddFunctor<DeviceType::OPENCL, T>::operator()(
     kernel_ = runtime->BuildKernel("bias_add", kernel_name, built_options);
 
     uint32_t idx = 0;
-    kernel_.setArg(idx++, *(static_cast<const cl::Image2D *>(input->buffer())));
-    kernel_.setArg(idx++, *(static_cast<const cl::Image2D *>(bias->buffer())));
-    kernel_.setArg(idx++, *(static_cast<cl::Image2D *>(output->buffer())));
+    kernel_.setArg(idx++, *(input->opencl_image()));
+    kernel_.setArg(idx++, *(bias->opencl_image()));
+    kernel_.setArg(idx++, *(output->opencl_image()));
   }
 
   const uint32_t gws[3] = {static_cast<uint32_t>(channel_blocks),
diff --git a/mace/kernels/opencl/buffer_to_image.cc b/mace/kernels/opencl/buffer_to_image.cc
index bc906163..19be430f 100644
--- a/mace/kernels/opencl/buffer_to_image.cc
+++ b/mace/kernels/opencl/buffer_to_image.cc
@@ -77,7 +77,7 @@ void BufferToImageFunctor<DeviceType::OPENCL, T>::operator()(Tensor *buffer,
                                          built_options);
 
   uint32_t idx = 0;
-  b2f_kernel.setArg(idx++, *(static_cast<const cl::Buffer *>(buffer->buffer())));
+  b2f_kernel.setArg(idx++, *(buffer->opencl_buffer()));
   if (!i2b_) {
     MACE_CHECK(buffer->buffer_offset() % GetEnumTypeSize(buffer->dtype()) == 0, "buffer offset not aligned");
     b2f_kernel.setArg(idx++, static_cast<uint32_t>(buffer->buffer_offset() / GetEnumTypeSize(buffer->dtype())));
@@ -93,8 +93,7 @@ void BufferToImageFunctor<DeviceType::OPENCL, T>::operator()(Tensor *buffer,
     b2f_kernel.setArg(idx++, static_cast<uint32_t>(buffer->dim(2)));
     b2f_kernel.setArg(idx++, static_cast<uint32_t>(buffer->dim(3)));
   }
-  b2f_kernel.setArg(idx++, *(static_cast<cl::Image2D *>(image->buffer())));
-
+  b2f_kernel.setArg(idx++, *(image->opencl_image()));
   const std::vector<uint32_t> lws = {16, 64};
   cl::Event event;
   cl_int error = runtime->command_queue().enqueueNDRangeKernel(
diff --git a/mace/kernels/opencl/concat.cc b/mace/kernels/opencl/concat.cc
index 686e3a7a..48466e6a 100644
--- a/mace/kernels/opencl/concat.cc
+++ b/mace/kernels/opencl/concat.cc
@@ -42,10 +42,10 @@ static void Concat2(cl::Kernel *kernel,
     *kernel = runtime->BuildKernel("concat", kernel_name, built_options);
 
     uint32_t idx = 0;
-    kernel->setArg(idx++, *(static_cast<const cl::Image2D *>(input0->buffer())));
-    kernel->setArg(idx++, *(static_cast<const cl::Image2D *>(input1->buffer())));
+    kernel->setArg(idx++, *(static_cast<const cl::Image2D *>(input0->opencl_image())));
+    kernel->setArg(idx++, *(static_cast<const cl::Image2D *>(input1->opencl_image())));
     kernel->setArg(idx++, static_cast<int32_t>(input0->dim(3)));
-    kernel->setArg(idx++, *(static_cast<cl::Image2D *>(output->buffer())));
+    kernel->setArg(idx++, *(static_cast<cl::Image2D *>(output->opencl_image())));
   }
 
   const uint32_t gws[3] = {
@@ -90,9 +90,9 @@ static void ConcatN(cl::Kernel *kernel,
   for (int i = 0; i < inputs_count; ++i) {
     const Tensor *input = input_list[i];
     uint32_t idx = 0;
-    kernel->setArg(idx++, *(static_cast<const cl::Image2D *>(input->buffer())));
+    kernel->setArg(idx++, *(input->opencl_image()));
     kernel->setArg(idx++, static_cast<int32_t>(chan_blk_offset));
-    kernel->setArg(idx++, *(static_cast<cl::Image2D *>(output->buffer())));
+    kernel->setArg(idx++, *(output->opencl_image()));
 
     index_t input_channel_blk = input->dim(3) / 4;
     chan_blk_offset += input_channel_blk;
diff --git a/mace/kernels/opencl/conv_2d_opencl_1x1.cc b/mace/kernels/opencl/conv_2d_opencl_1x1.cc
index bee0e12a..b370b32b 100644
--- a/mace/kernels/opencl/conv_2d_opencl_1x1.cc
+++ b/mace/kernels/opencl/conv_2d_opencl_1x1.cc
@@ -71,15 +71,15 @@ extern void Conv2dOpenclK1x1(cl::Kernel *kernel,
 
     uint32_t idx = 0;
     kernel->setArg(idx++,
-                          *(static_cast<const cl::Image2D *>(input->buffer())));
+                          *(input->opencl_image()));
     kernel->setArg(idx++,
-                          *(static_cast<const cl::Image2D *>(filter->buffer())));
+                          *(filter->opencl_image()));
     if (bias != nullptr) {
       kernel->setArg(idx++,
-                            *(static_cast<const cl::Image2D *>(bias->buffer())));
+                            *(bias->opencl_image()));
     }
     kernel->setArg(idx++,
-                          *(static_cast<const cl::Image2D *>(output->buffer())));
+                          *(output->opencl_image()));
     // FIXME handle flexable data type: half not supported
     kernel->setArg(idx++, relux_max_limit);
     kernel->setArg(idx++, static_cast<int>(input_height));
diff --git a/mace/kernels/opencl/conv_2d_opencl_3x3.cc b/mace/kernels/opencl/conv_2d_opencl_3x3.cc
index bb677177..a7eb668d 100644
--- a/mace/kernels/opencl/conv_2d_opencl_3x3.cc
+++ b/mace/kernels/opencl/conv_2d_opencl_3x3.cc
@@ -66,15 +66,15 @@ extern void Conv2dOpenclK3x3(cl::Kernel *kernel,
 
     uint32_t idx = 0;
     kernel->setArg(idx++,
-                          *(static_cast<const cl::Image2D *>(input->buffer())));
+                          *(input->opencl_image()));
     kernel->setArg(idx++,
-                          *(static_cast<const cl::Image2D *>(filter->buffer())));
+                          *(filter->opencl_image()));
     if (bias != nullptr) {
       kernel->setArg(idx++,
-                            *(static_cast<const cl::Image2D *>(bias->buffer())));
+                            *(bias->opencl_image()));
     }
     kernel->setArg(idx++,
-                          *(static_cast<const cl::Image2D *>(output->buffer())));
+                          *(output->opencl_image()));
     kernel->setArg(idx++, relux_max_limit);
     kernel->setArg(idx++, static_cast<int>(input->dim(1)));
     kernel->setArg(idx++, static_cast<int>(input->dim(2)));
diff --git a/mace/kernels/opencl/conv_2d_opencl_general.cc b/mace/kernels/opencl/conv_2d_opencl_general.cc
index af344c28..5f3ffa5e 100644
--- a/mace/kernels/opencl/conv_2d_opencl_general.cc
+++ b/mace/kernels/opencl/conv_2d_opencl_general.cc
@@ -66,15 +66,15 @@ extern void Conv2dOpencl(cl::Kernel *kernel,
 
     uint32_t idx = 0;
     kernel->setArg(idx++,
-                          *(static_cast<const cl::Image2D *>(input->buffer())));
+                          *(input->opencl_image()));
     kernel->setArg(idx++,
-                          *(static_cast<const cl::Image2D *>(filter->buffer())));
+                          *(filter->opencl_image()));
     if (bias != nullptr) {
       kernel->setArg(idx++,
-                            *(static_cast<const cl::Image2D *>(bias->buffer())));
+                            *(bias->opencl_image()));
     }
     kernel->setArg(idx++,
-                          *(static_cast<const cl::Image2D *>(output->buffer())));
+                          *(output->opencl_image()));
     kernel->setArg(idx++, relux_max_limit);
     kernel->setArg(idx++, static_cast<uint32_t>(input->dim(1)));
     kernel->setArg(idx++, static_cast<uint32_t>(input->dim(2)));
diff --git a/mace/kernels/opencl/depthwise_conv_opencl.cc b/mace/kernels/opencl/depthwise_conv_opencl.cc
index 2942c5d0..3bbd4f43 100644
--- a/mace/kernels/opencl/depthwise_conv_opencl.cc
+++ b/mace/kernels/opencl/depthwise_conv_opencl.cc
@@ -81,16 +81,15 @@ void DepthwiseConv2d(cl::Kernel *kernel,
     *kernel = runtime->BuildKernel("depthwise_conv2d", kernel_name, built_options);
 
     uint32_t idx = 0;
-    kernel->setArg(idx++,
-                            *(static_cast<const cl::Image2D *>(input->buffer())));
+    kernel->setArg(idx++, *(input->opencl_image()));
     kernel->setArg(
-        idx++, *(static_cast<const cl::Image2D *>(filter->buffer())));
+        idx++, *(filter->opencl_image()));
     if (bias != nullptr) {
       kernel->setArg(
-          idx++, *(static_cast<const cl::Image2D *>(bias->buffer())));
+          idx++, *(bias->opencl_image()));
     }
     kernel->setArg(
-        idx++, *(static_cast<const cl::Image2D *>(output->buffer())));
+        idx++, *(output->opencl_image()));
     kernel->setArg(idx++, relux_max_limit);
     kernel->setArg(idx++, static_cast<short>(input_height));
     kernel->setArg(idx++, static_cast<short>(input_width));
diff --git a/mace/kernels/opencl/eltwise_opencl.cc b/mace/kernels/opencl/eltwise_opencl.cc
index e49a36b2..8c589c2f 100644
--- a/mace/kernels/opencl/eltwise_opencl.cc
+++ b/mace/kernels/opencl/eltwise_opencl.cc
@@ -39,14 +39,14 @@ void EltwiseFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input0,
 
     uint32_t idx = 0;
     kernel_.setArg(idx++,
-                   *(static_cast<const cl::Image2D *>(input0->buffer())));
+                   *(input0->opencl_image()));
     kernel_.setArg(idx++,
-                   *(static_cast<const cl::Image2D *>(input1->buffer())));
+                   *(input1->opencl_image()));
     if (!coeff_.empty()) {
       kernel_.setArg(idx++, coeff_[0]);
       kernel_.setArg(idx++, coeff_[1]);
     }
-    kernel_.setArg(idx++, *(static_cast<cl::Image2D *>(output->buffer())));
+    kernel_.setArg(idx++, *(output->opencl_image()));
   }
 
   const uint32_t gws[2] = {
diff --git a/mace/kernels/opencl/fully_connected_opencl.cc b/mace/kernels/opencl/fully_connected_opencl.cc
index 33e26eca..4a4eacc1 100644
--- a/mace/kernels/opencl/fully_connected_opencl.cc
+++ b/mace/kernels/opencl/fully_connected_opencl.cc
@@ -61,15 +61,15 @@ void FullyConnectedFunctor<DeviceType::OPENCL, T>::operator()(
 
     uint32_t idx = 0;
     kernel_.setArg(idx++,
-                   *(static_cast<const cl::Image2D *>(input->buffer())));
+                   *(input->opencl_image()));
     kernel_.setArg(idx++,
-                   *(static_cast<const cl::Image2D *>(weight->buffer())));
+                   *(weight->opencl_image()));
     if (bias != nullptr) {
       kernel_.setArg(idx++,
-                     *(static_cast<const cl::Image2D *>(bias->buffer())));
+                     *(bias->opencl_image()));
     }
     kernel_.setArg(idx++,
-                   *(static_cast<const cl::Image2D *>(output->buffer())));
+                   *(output->opencl_image()));
     kernel_.setArg(idx++, static_cast<int>(input->dim(1)));
     kernel_.setArg(idx++, static_cast<int>(input->dim(2)));
     kernel_.setArg(idx++, static_cast<int>(input->dim(3)));
diff --git a/mace/kernels/opencl/matmul.cc b/mace/kernels/opencl/matmul.cc
index c7f61849..77560853 100644
--- a/mace/kernels/opencl/matmul.cc
+++ b/mace/kernels/opencl/matmul.cc
@@ -40,11 +40,10 @@ void MatMulFunctor<DeviceType::OPENCL, T>::operator()(
     kernel_ = runtime->BuildKernel("matmul", kernel_name, built_options);
 
     uint32_t idx = 0;
+    kernel_.setArg(idx++, *(A->opencl_image()));
     kernel_.setArg(idx++,
-                         *(static_cast<const cl::Image2D *>(A->buffer())));
-    kernel_.setArg(idx++,
-                         *(static_cast<const cl::Image2D *>(B->buffer())));
-    kernel_.setArg(idx++, *(static_cast<cl::Image2D *>(C->buffer())));
+                         *(B->opencl_image()));
+    kernel_.setArg(idx++, *(C->opencl_image()));
     kernel_.setArg(idx++, static_cast<int>(height));
     kernel_.setArg(idx++, static_cast<int>(width));
     kernel_.setArg(idx++, static_cast<int>(A->dim(2)));
diff --git a/mace/kernels/opencl/pooling_opencl.cc b/mace/kernels/opencl/pooling_opencl.cc
index 2ec0e084..1272a4fb 100644
--- a/mace/kernels/opencl/pooling_opencl.cc
+++ b/mace/kernels/opencl/pooling_opencl.cc
@@ -65,7 +65,7 @@ void PoolingFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
     kernel_ = runtime->BuildKernel("pooling", kernel_name, built_options);
 
     uint32_t idx = 0;
-    kernel_.setArg(idx++, *(static_cast<const cl::Image2D *>(input->buffer())));
+    kernel_.setArg(idx++, *(input->opencl_image()));
     kernel_.setArg(idx++, static_cast<int32_t>(input->dim(1)));
     kernel_.setArg(idx++, static_cast<int32_t>(input->dim(2)));
     kernel_.setArg(idx++, static_cast<int32_t>(out_height));
@@ -73,7 +73,7 @@ void PoolingFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
     kernel_.setArg(idx++, paddings[1] / 2);
     kernel_.setArg(idx++, strides_[0]);
     kernel_.setArg(idx++, kernels_[0]);
-    kernel_.setArg(idx++, *(static_cast<cl::Image2D *>(output->buffer())));
+    kernel_.setArg(idx++, *(output->opencl_image()));
   }
 
   const uint32_t gws[3] = {
diff --git a/mace/kernels/opencl/resize_bilinear_opencl.cc b/mace/kernels/opencl/resize_bilinear_opencl.cc
index d8f4185e..5761d3cb 100644
--- a/mace/kernels/opencl/resize_bilinear_opencl.cc
+++ b/mace/kernels/opencl/resize_bilinear_opencl.cc
@@ -48,8 +48,8 @@ void ResizeBilinearFunctor<DeviceType::OPENCL, T>::operator()(
     kernel_ = runtime->BuildKernel("resize_bilinear", kernel_name, built_options);
 
     uint32_t idx = 0;
-    kernel_.setArg(idx++, *(static_cast<const cl::Image2D *>(input->buffer())));
-    kernel_.setArg(idx++, *(static_cast<cl::Image2D *>(output->buffer())));
+    kernel_.setArg(idx++, *(input->opencl_image()));
+    kernel_.setArg(idx++, *(output->opencl_image()));
     kernel_.setArg(idx++, height_scale);
     kernel_.setArg(idx++, width_scale);
     kernel_.setArg(idx++, static_cast<int32_t>(in_height));
diff --git a/mace/kernels/opencl/softmax_opencl.cc b/mace/kernels/opencl/softmax_opencl.cc
index 55a48775..a3336aa6 100644
--- a/mace/kernels/opencl/softmax_opencl.cc
+++ b/mace/kernels/opencl/softmax_opencl.cc
@@ -35,10 +35,10 @@ void SoftmaxFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *logits,
     kernel_ = runtime->BuildKernel("softmax", kernel_name, built_options);
 
     uint32_t idx = 0;
-    kernel_.setArg(idx++, *(static_cast<const cl::Image2D *>(logits->buffer())));
+    kernel_.setArg(idx++, *(logits->opencl_image()));
     kernel_.setArg(idx++, static_cast<int>(channels));
     kernel_.setArg(idx++, remain_channels);
-    kernel_.setArg(idx++, *(static_cast<cl::Image2D *>(output->buffer())));
+    kernel_.setArg(idx++, *(output->opencl_image()));
   }
   const uint32_t gws[3] = {static_cast<uint32_t>(channel_blocks),
                            static_cast<uint32_t>(width),
diff --git a/mace/kernels/opencl/space_to_batch_opencl.cc b/mace/kernels/opencl/space_to_batch_opencl.cc
index 5940f4d3..2eb06027 100644
--- a/mace/kernels/opencl/space_to_batch_opencl.cc
+++ b/mace/kernels/opencl/space_to_batch_opencl.cc
@@ -42,11 +42,11 @@ void SpaceToBatchFunctor<DeviceType::OPENCL, T>::operator()(Tensor *space_tensor
 
     uint32_t idx = 0;
     if (b2s_) {
-      kernel_.setArg(idx++, *(static_cast<const cl::Image2D *>(batch_tensor->buffer())));
-      kernel_.setArg(idx++, *(static_cast<cl::Image2D *>(space_tensor->buffer())));
+      kernel_.setArg(idx++, *(batch_tensor->opencl_image()));
+      kernel_.setArg(idx++, *(space_tensor->opencl_image()));
     } else {
-      kernel_.setArg(idx++, *(static_cast<const cl::Image2D *>(space_tensor->buffer())));
-      kernel_.setArg(idx++, *(static_cast<cl::Image2D *>(batch_tensor->buffer())));
+      kernel_.setArg(idx++, *(space_tensor->opencl_image()));
+      kernel_.setArg(idx++, *(batch_tensor->opencl_image()));
     }
     kernel_.setArg(idx++, block_shape_[0]);
     kernel_.setArg(idx++, block_shape_[1]);
diff --git a/mace/kernels/opencl/winograd_transform.cc b/mace/kernels/opencl/winograd_transform.cc
index 54511220..8fd17f21 100644
--- a/mace/kernels/opencl/winograd_transform.cc
+++ b/mace/kernels/opencl/winograd_transform.cc
@@ -49,8 +49,8 @@ void WinogradTransformFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *i
                                    built_options);
 
     uint32_t idx = 0;
-    kernel_.setArg(idx++, *(static_cast<const cl::Image2D *>(input_tensor->buffer())));
-    kernel_.setArg(idx++, *(static_cast<cl::Image2D *>(output_tensor->buffer())));
+    kernel_.setArg(idx++, *(input_tensor->opencl_image()));
+    kernel_.setArg(idx++, *(output_tensor->opencl_image()));
     kernel_.setArg(idx++, static_cast<uint32_t>(input_tensor->dim(1)));
     kernel_.setArg(idx++, static_cast<uint32_t>(input_tensor->dim(2)));
     kernel_.setArg(idx++, static_cast<uint32_t>(input_tensor->dim(3)));
@@ -119,11 +119,11 @@ void WinogradInverseTransformFunctor<DeviceType::OPENCL, T>::operator()(const Te
     const uint32_t round_h = (height_ + 1) / 2;
     const uint32_t round_w = (width_ + 1) / 2;
     uint32_t idx = 0;
-    kernel_.setArg(idx++, *(static_cast<const cl::Image2D *>(input_tensor->buffer())));
+    kernel_.setArg(idx++, *(static_cast<const cl::Image2D *>(input_tensor->opencl_image())));
     if (bias != nullptr) {
-      kernel_.setArg(idx++, *(static_cast<const cl::Image2D *>(bias->buffer())));
+      kernel_.setArg(idx++, *(static_cast<const cl::Image2D *>(bias->opencl_image())));
     }
-    kernel_.setArg(idx++, *(static_cast<cl::Image2D *>(output_tensor->buffer())));
+    kernel_.setArg(idx++, *(static_cast<cl::Image2D *>(output_tensor->opencl_image())));
     kernel_.setArg(idx++, static_cast<uint32_t>(output_shape[1]));
     kernel_.setArg(idx++, static_cast<uint32_t>(output_shape[2]));
     kernel_.setArg(idx++, static_cast<uint32_t>(round_h * round_w));
-- 
GitLab