From 358ebd4d86c44bdf03bed9d51be15d353eda9f49 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E5=AF=85?= Date: Tue, 6 Mar 2018 11:47:41 +0800 Subject: [PATCH] 1. Fix ResizeLike. 2. Distinguish opencl buffer and image --- mace/core/buffer.h | 14 ++++- mace/core/tensor.h | 58 +++++++++++++++---- mace/kernels/opencl/activation_opencl.cc | 6 +- mace/kernels/opencl/addn.cc | 4 +- mace/kernels/opencl/batch_norm_opencl.cc | 12 ++-- mace/kernels/opencl/bias_add_opencl.cc | 6 +- mace/kernels/opencl/buffer_to_image.cc | 5 +- mace/kernels/opencl/concat.cc | 10 ++-- mace/kernels/opencl/conv_2d_opencl_1x1.cc | 8 +-- mace/kernels/opencl/conv_2d_opencl_3x3.cc | 8 +-- mace/kernels/opencl/conv_2d_opencl_general.cc | 8 +-- mace/kernels/opencl/depthwise_conv_opencl.cc | 9 ++- mace/kernels/opencl/eltwise_opencl.cc | 6 +- mace/kernels/opencl/fully_connected_opencl.cc | 8 +-- mace/kernels/opencl/matmul.cc | 7 +-- mace/kernels/opencl/pooling_opencl.cc | 4 +- mace/kernels/opencl/resize_bilinear_opencl.cc | 4 +- mace/kernels/opencl/softmax_opencl.cc | 4 +- mace/kernels/opencl/space_to_batch_opencl.cc | 8 +-- mace/kernels/opencl/winograd_transform.cc | 10 ++-- 20 files changed, 119 insertions(+), 80 deletions(-) diff --git a/mace/core/buffer.h b/mace/core/buffer.h index bada99c0..c17c4a1d 100644 --- a/mace/core/buffer.h +++ b/mace/core/buffer.h @@ -241,7 +241,9 @@ class Image : public BufferBase { mapped_buf_ = nullptr; }; - void Resize(index_t size) {} + void Resize(index_t size) { + MACE_NOT_IMPLEMENTED; + } void Copy(void *src, index_t offset, index_t length) { MACE_NOT_IMPLEMENTED; @@ -263,7 +265,11 @@ class Image : public BufferBase { class BufferSlice : public BufferBase { public: - BufferSlice() {} + BufferSlice() + : buffer_(nullptr), + mapped_buf_(nullptr), + offset_(0), + length_(0) {} BufferSlice(BufferBase *buffer, index_t offset, index_t length) : BufferBase(buffer->size()), buffer_(buffer), @@ -284,12 +290,13 @@ class BufferSlice : public BufferBase { other.length_) {} ~BufferSlice() { - if (mapped_buf_ != nullptr) { + if (buffer_ != nullptr && mapped_buf_ != nullptr) { UnMap(); } } void *buffer() { + MACE_CHECK_NOTNULL(buffer_); return buffer_->buffer(); }; @@ -330,6 +337,7 @@ class BufferSlice : public BufferBase { }; void Resize(index_t size) { + MACE_NOT_IMPLEMENTED; } void Copy(void *src, index_t offset, index_t length) { diff --git a/mace/core/tensor.h b/mace/core/tensor.h index cfe832ed..47fa3d11 100644 --- a/mace/core/tensor.h +++ b/mace/core/tensor.h @@ -5,6 +5,7 @@ #ifndef MACE_CORE_TENSOR_H_ #define MACE_CORE_TENSOR_H_ +#include "mace/core/runtime/opencl/cl2.hpp" #include "mace/core/buffer.h" #include "mace/utils/logging.h" #include "mace/core/types.h" @@ -112,10 +113,24 @@ class Tensor { return size() * SizeOfType(); } - inline void *buffer() const { - MACE_CHECK(buffer_ != nullptr && buffer_->buffer() != nullptr, - "buffer is null"); - return buffer_->buffer(); + inline bool has_opencl_image() const { + return buffer_ != nullptr && !buffer_->OnHost() + && typeid(*buffer_) == typeid(Image); + } + + inline bool has_opencl_buffer() const { + return buffer_ != nullptr && !buffer_->OnHost() + && !has_opencl_image(); + } + + inline cl::Image *opencl_image() const { + MACE_CHECK(has_opencl_image(), "do not have image"); + return static_cast(buffer_->buffer()); + } + + inline cl::Buffer *opencl_buffer() const { + MACE_CHECK(has_opencl_buffer(), "do not have opencl buffer"); + return static_cast(buffer_->buffer()); } inline index_t buffer_offset() const { @@ -152,6 +167,7 @@ class Tensor { inline void Resize(const std::vector &shape) { shape_ = shape; if (buffer_ != nullptr) { + MACE_CHECK(!has_opencl_image(), "Cannot resize image, use ResizeImage."); buffer_->Resize(raw_size()); } else { buffer_ = new Buffer(allocator_, raw_size()); @@ -159,20 +175,38 @@ class Tensor { } } - inline void ResizeLike(const Tensor &other) { - Resize(other.shape()); - } - - inline void ResizeLike(const Tensor *other) { - Resize(other->shape()); - } - inline void ResizeImage(const std::vector &shape, const std::vector &image_shape) { shape_ = shape; if (buffer_ == nullptr) { buffer_ = new Image(image_shape, dtype_); is_buffer_owner_ = true; + } else { + MACE_CHECK(has_opencl_image(), "Cannot ResizeImage buffer, use Resize."); + Image *image = dynamic_cast(buffer_); + MACE_CHECK(image_shape[0] <= image->image_shape()[0] + && image_shape[1] <= image->image_shape()[1]); + } + } + + inline void ResizeLike(const Tensor &other) { + ResizeLike(&other); + } + + inline void ResizeLike(const Tensor *other) { + if (other->has_opencl_image()) { + if (is_buffer_owner_ && buffer_ != nullptr && !has_opencl_image()) { + delete buffer_; + buffer_ = nullptr; + } + ResizeImage(other->shape(), + dynamic_cast(other->UnderlyingBuffer())->image_shape()); + } else { + if (is_buffer_owner_ && buffer_ != nullptr && has_opencl_image()) { + delete buffer_; + buffer_ = nullptr; + } + Resize(other->shape()); } } diff --git a/mace/kernels/opencl/activation_opencl.cc b/mace/kernels/opencl/activation_opencl.cc index dee01087..99b8a6bc 100644 --- a/mace/kernels/opencl/activation_opencl.cc +++ b/mace/kernels/opencl/activation_opencl.cc @@ -60,12 +60,12 @@ void ActivationFunctor::operator()(const Tensor *input, kernel_ = runtime->BuildKernel("activation", kernel_name, built_options); int idx = 0; - kernel_.setArg(idx++, *(static_cast(input->buffer()))); + kernel_.setArg(idx++, *(input->opencl_image())); if (activation_ == PRELU) { - kernel_.setArg(idx++, *(static_cast(alpha->buffer()))); + kernel_.setArg(idx++, *(alpha->opencl_image())); } kernel_.setArg(idx++, static_cast(relux_max_limit_)); - kernel_.setArg(idx++, *(static_cast(output->buffer()))); + kernel_.setArg(idx++, *(output->opencl_image())); } const uint32_t gws[3] = {static_cast(channel_blocks), diff --git a/mace/kernels/opencl/addn.cc b/mace/kernels/opencl/addn.cc index 3495ddca..38388081 100644 --- a/mace/kernels/opencl/addn.cc +++ b/mace/kernels/opencl/addn.cc @@ -58,9 +58,9 @@ void AddNFunctor::operator()( uint32_t idx = 0; for (auto input : input_tensors) { kernel_.setArg(idx++, - *(static_cast(input->buffer()))); + *(input->opencl_image())); } - kernel_.setArg(idx++, *(static_cast(output_tensor->buffer()))); + kernel_.setArg(idx++, *(output_tensor->opencl_image())); } const uint32_t gws[2] = { diff --git a/mace/kernels/opencl/batch_norm_opencl.cc b/mace/kernels/opencl/batch_norm_opencl.cc index 7696e875..571bdd53 100644 --- a/mace/kernels/opencl/batch_norm_opencl.cc +++ b/mace/kernels/opencl/batch_norm_opencl.cc @@ -64,17 +64,17 @@ void BatchNormFunctor::operator()(const Tensor *input, runtime->BuildKernel("batch_norm", kernel_name, built_options); uint32_t idx = 0; - kernel_.setArg(idx++, *(static_cast(input->buffer()))); - kernel_.setArg(idx++, *(static_cast(scale->buffer()))); + kernel_.setArg(idx++, *(input->opencl_image())); + kernel_.setArg(idx++, *(scale->opencl_image())); kernel_.setArg(idx++, - *(static_cast(offset->buffer()))); + *(offset->opencl_image())); if (!folded_constant_) { kernel_.setArg(idx++, - *(static_cast(mean->buffer()))); - kernel_.setArg(idx++, *(static_cast(var->buffer()))); + *(mean->opencl_image())); + kernel_.setArg(idx++, *(var->opencl_image())); kernel_.setArg(idx++, epsilon); } - kernel_.setArg(idx++, *(static_cast(output->buffer()))); + kernel_.setArg(idx++, *(output->opencl_image())); kernel_.setArg(idx++, relux_max_limit_); } diff --git a/mace/kernels/opencl/bias_add_opencl.cc b/mace/kernels/opencl/bias_add_opencl.cc index 84eff1bf..c8507433 100644 --- a/mace/kernels/opencl/bias_add_opencl.cc +++ b/mace/kernels/opencl/bias_add_opencl.cc @@ -35,9 +35,9 @@ void BiasAddFunctor::operator()( kernel_ = runtime->BuildKernel("bias_add", kernel_name, built_options); uint32_t idx = 0; - kernel_.setArg(idx++, *(static_cast(input->buffer()))); - kernel_.setArg(idx++, *(static_cast(bias->buffer()))); - kernel_.setArg(idx++, *(static_cast(output->buffer()))); + kernel_.setArg(idx++, *(input->opencl_image())); + kernel_.setArg(idx++, *(bias->opencl_image())); + kernel_.setArg(idx++, *(output->opencl_image())); } const uint32_t gws[3] = {static_cast(channel_blocks), diff --git a/mace/kernels/opencl/buffer_to_image.cc b/mace/kernels/opencl/buffer_to_image.cc index bc906163..19be430f 100644 --- a/mace/kernels/opencl/buffer_to_image.cc +++ b/mace/kernels/opencl/buffer_to_image.cc @@ -77,7 +77,7 @@ void BufferToImageFunctor::operator()(Tensor *buffer, built_options); uint32_t idx = 0; - b2f_kernel.setArg(idx++, *(static_cast(buffer->buffer()))); + b2f_kernel.setArg(idx++, *(buffer->opencl_buffer())); if (!i2b_) { MACE_CHECK(buffer->buffer_offset() % GetEnumTypeSize(buffer->dtype()) == 0, "buffer offset not aligned"); b2f_kernel.setArg(idx++, static_cast(buffer->buffer_offset() / GetEnumTypeSize(buffer->dtype()))); @@ -93,8 +93,7 @@ void BufferToImageFunctor::operator()(Tensor *buffer, b2f_kernel.setArg(idx++, static_cast(buffer->dim(2))); b2f_kernel.setArg(idx++, static_cast(buffer->dim(3))); } - b2f_kernel.setArg(idx++, *(static_cast(image->buffer()))); - + b2f_kernel.setArg(idx++, *(image->opencl_image())); const std::vector lws = {16, 64}; cl::Event event; cl_int error = runtime->command_queue().enqueueNDRangeKernel( diff --git a/mace/kernels/opencl/concat.cc b/mace/kernels/opencl/concat.cc index 686e3a7a..48466e6a 100644 --- a/mace/kernels/opencl/concat.cc +++ b/mace/kernels/opencl/concat.cc @@ -42,10 +42,10 @@ static void Concat2(cl::Kernel *kernel, *kernel = runtime->BuildKernel("concat", kernel_name, built_options); uint32_t idx = 0; - kernel->setArg(idx++, *(static_cast(input0->buffer()))); - kernel->setArg(idx++, *(static_cast(input1->buffer()))); + kernel->setArg(idx++, *(static_cast(input0->opencl_image()))); + kernel->setArg(idx++, *(static_cast(input1->opencl_image()))); kernel->setArg(idx++, static_cast(input0->dim(3))); - kernel->setArg(idx++, *(static_cast(output->buffer()))); + kernel->setArg(idx++, *(static_cast(output->opencl_image()))); } const uint32_t gws[3] = { @@ -90,9 +90,9 @@ static void ConcatN(cl::Kernel *kernel, for (int i = 0; i < inputs_count; ++i) { const Tensor *input = input_list[i]; uint32_t idx = 0; - kernel->setArg(idx++, *(static_cast(input->buffer()))); + kernel->setArg(idx++, *(input->opencl_image())); kernel->setArg(idx++, static_cast(chan_blk_offset)); - kernel->setArg(idx++, *(static_cast(output->buffer()))); + kernel->setArg(idx++, *(output->opencl_image())); index_t input_channel_blk = input->dim(3) / 4; chan_blk_offset += input_channel_blk; diff --git a/mace/kernels/opencl/conv_2d_opencl_1x1.cc b/mace/kernels/opencl/conv_2d_opencl_1x1.cc index bee0e12a..b370b32b 100644 --- a/mace/kernels/opencl/conv_2d_opencl_1x1.cc +++ b/mace/kernels/opencl/conv_2d_opencl_1x1.cc @@ -71,15 +71,15 @@ extern void Conv2dOpenclK1x1(cl::Kernel *kernel, uint32_t idx = 0; kernel->setArg(idx++, - *(static_cast(input->buffer()))); + *(input->opencl_image())); kernel->setArg(idx++, - *(static_cast(filter->buffer()))); + *(filter->opencl_image())); if (bias != nullptr) { kernel->setArg(idx++, - *(static_cast(bias->buffer()))); + *(bias->opencl_image())); } kernel->setArg(idx++, - *(static_cast(output->buffer()))); + *(output->opencl_image())); // FIXME handle flexable data type: half not supported kernel->setArg(idx++, relux_max_limit); kernel->setArg(idx++, static_cast(input_height)); diff --git a/mace/kernels/opencl/conv_2d_opencl_3x3.cc b/mace/kernels/opencl/conv_2d_opencl_3x3.cc index bb677177..a7eb668d 100644 --- a/mace/kernels/opencl/conv_2d_opencl_3x3.cc +++ b/mace/kernels/opencl/conv_2d_opencl_3x3.cc @@ -66,15 +66,15 @@ extern void Conv2dOpenclK3x3(cl::Kernel *kernel, uint32_t idx = 0; kernel->setArg(idx++, - *(static_cast(input->buffer()))); + *(input->opencl_image())); kernel->setArg(idx++, - *(static_cast(filter->buffer()))); + *(filter->opencl_image())); if (bias != nullptr) { kernel->setArg(idx++, - *(static_cast(bias->buffer()))); + *(bias->opencl_image())); } kernel->setArg(idx++, - *(static_cast(output->buffer()))); + *(output->opencl_image())); kernel->setArg(idx++, relux_max_limit); kernel->setArg(idx++, static_cast(input->dim(1))); kernel->setArg(idx++, static_cast(input->dim(2))); diff --git a/mace/kernels/opencl/conv_2d_opencl_general.cc b/mace/kernels/opencl/conv_2d_opencl_general.cc index af344c28..5f3ffa5e 100644 --- a/mace/kernels/opencl/conv_2d_opencl_general.cc +++ b/mace/kernels/opencl/conv_2d_opencl_general.cc @@ -66,15 +66,15 @@ extern void Conv2dOpencl(cl::Kernel *kernel, uint32_t idx = 0; kernel->setArg(idx++, - *(static_cast(input->buffer()))); + *(input->opencl_image())); kernel->setArg(idx++, - *(static_cast(filter->buffer()))); + *(filter->opencl_image())); if (bias != nullptr) { kernel->setArg(idx++, - *(static_cast(bias->buffer()))); + *(bias->opencl_image())); } kernel->setArg(idx++, - *(static_cast(output->buffer()))); + *(output->opencl_image())); kernel->setArg(idx++, relux_max_limit); kernel->setArg(idx++, static_cast(input->dim(1))); kernel->setArg(idx++, static_cast(input->dim(2))); diff --git a/mace/kernels/opencl/depthwise_conv_opencl.cc b/mace/kernels/opencl/depthwise_conv_opencl.cc index 2942c5d0..3bbd4f43 100644 --- a/mace/kernels/opencl/depthwise_conv_opencl.cc +++ b/mace/kernels/opencl/depthwise_conv_opencl.cc @@ -81,16 +81,15 @@ void DepthwiseConv2d(cl::Kernel *kernel, *kernel = runtime->BuildKernel("depthwise_conv2d", kernel_name, built_options); uint32_t idx = 0; - kernel->setArg(idx++, - *(static_cast(input->buffer()))); + kernel->setArg(idx++, *(input->opencl_image())); kernel->setArg( - idx++, *(static_cast(filter->buffer()))); + idx++, *(filter->opencl_image())); if (bias != nullptr) { kernel->setArg( - idx++, *(static_cast(bias->buffer()))); + idx++, *(bias->opencl_image())); } kernel->setArg( - idx++, *(static_cast(output->buffer()))); + idx++, *(output->opencl_image())); kernel->setArg(idx++, relux_max_limit); kernel->setArg(idx++, static_cast(input_height)); kernel->setArg(idx++, static_cast(input_width)); diff --git a/mace/kernels/opencl/eltwise_opencl.cc b/mace/kernels/opencl/eltwise_opencl.cc index e49a36b2..8c589c2f 100644 --- a/mace/kernels/opencl/eltwise_opencl.cc +++ b/mace/kernels/opencl/eltwise_opencl.cc @@ -39,14 +39,14 @@ void EltwiseFunctor::operator()(const Tensor *input0, uint32_t idx = 0; kernel_.setArg(idx++, - *(static_cast(input0->buffer()))); + *(input0->opencl_image())); kernel_.setArg(idx++, - *(static_cast(input1->buffer()))); + *(input1->opencl_image())); if (!coeff_.empty()) { kernel_.setArg(idx++, coeff_[0]); kernel_.setArg(idx++, coeff_[1]); } - kernel_.setArg(idx++, *(static_cast(output->buffer()))); + kernel_.setArg(idx++, *(output->opencl_image())); } const uint32_t gws[2] = { diff --git a/mace/kernels/opencl/fully_connected_opencl.cc b/mace/kernels/opencl/fully_connected_opencl.cc index 33e26eca..4a4eacc1 100644 --- a/mace/kernels/opencl/fully_connected_opencl.cc +++ b/mace/kernels/opencl/fully_connected_opencl.cc @@ -61,15 +61,15 @@ void FullyConnectedFunctor::operator()( uint32_t idx = 0; kernel_.setArg(idx++, - *(static_cast(input->buffer()))); + *(input->opencl_image())); kernel_.setArg(idx++, - *(static_cast(weight->buffer()))); + *(weight->opencl_image())); if (bias != nullptr) { kernel_.setArg(idx++, - *(static_cast(bias->buffer()))); + *(bias->opencl_image())); } kernel_.setArg(idx++, - *(static_cast(output->buffer()))); + *(output->opencl_image())); kernel_.setArg(idx++, static_cast(input->dim(1))); kernel_.setArg(idx++, static_cast(input->dim(2))); kernel_.setArg(idx++, static_cast(input->dim(3))); diff --git a/mace/kernels/opencl/matmul.cc b/mace/kernels/opencl/matmul.cc index c7f61849..77560853 100644 --- a/mace/kernels/opencl/matmul.cc +++ b/mace/kernels/opencl/matmul.cc @@ -40,11 +40,10 @@ void MatMulFunctor::operator()( kernel_ = runtime->BuildKernel("matmul", kernel_name, built_options); uint32_t idx = 0; + kernel_.setArg(idx++, *(A->opencl_image())); kernel_.setArg(idx++, - *(static_cast(A->buffer()))); - kernel_.setArg(idx++, - *(static_cast(B->buffer()))); - kernel_.setArg(idx++, *(static_cast(C->buffer()))); + *(B->opencl_image())); + kernel_.setArg(idx++, *(C->opencl_image())); kernel_.setArg(idx++, static_cast(height)); kernel_.setArg(idx++, static_cast(width)); kernel_.setArg(idx++, static_cast(A->dim(2))); diff --git a/mace/kernels/opencl/pooling_opencl.cc b/mace/kernels/opencl/pooling_opencl.cc index 2ec0e084..1272a4fb 100644 --- a/mace/kernels/opencl/pooling_opencl.cc +++ b/mace/kernels/opencl/pooling_opencl.cc @@ -65,7 +65,7 @@ void PoolingFunctor::operator()(const Tensor *input, kernel_ = runtime->BuildKernel("pooling", kernel_name, built_options); uint32_t idx = 0; - kernel_.setArg(idx++, *(static_cast(input->buffer()))); + kernel_.setArg(idx++, *(input->opencl_image())); kernel_.setArg(idx++, static_cast(input->dim(1))); kernel_.setArg(idx++, static_cast(input->dim(2))); kernel_.setArg(idx++, static_cast(out_height)); @@ -73,7 +73,7 @@ void PoolingFunctor::operator()(const Tensor *input, kernel_.setArg(idx++, paddings[1] / 2); kernel_.setArg(idx++, strides_[0]); kernel_.setArg(idx++, kernels_[0]); - kernel_.setArg(idx++, *(static_cast(output->buffer()))); + kernel_.setArg(idx++, *(output->opencl_image())); } const uint32_t gws[3] = { diff --git a/mace/kernels/opencl/resize_bilinear_opencl.cc b/mace/kernels/opencl/resize_bilinear_opencl.cc index d8f4185e..5761d3cb 100644 --- a/mace/kernels/opencl/resize_bilinear_opencl.cc +++ b/mace/kernels/opencl/resize_bilinear_opencl.cc @@ -48,8 +48,8 @@ void ResizeBilinearFunctor::operator()( kernel_ = runtime->BuildKernel("resize_bilinear", kernel_name, built_options); uint32_t idx = 0; - kernel_.setArg(idx++, *(static_cast(input->buffer()))); - kernel_.setArg(idx++, *(static_cast(output->buffer()))); + kernel_.setArg(idx++, *(input->opencl_image())); + kernel_.setArg(idx++, *(output->opencl_image())); kernel_.setArg(idx++, height_scale); kernel_.setArg(idx++, width_scale); kernel_.setArg(idx++, static_cast(in_height)); diff --git a/mace/kernels/opencl/softmax_opencl.cc b/mace/kernels/opencl/softmax_opencl.cc index 55a48775..a3336aa6 100644 --- a/mace/kernels/opencl/softmax_opencl.cc +++ b/mace/kernels/opencl/softmax_opencl.cc @@ -35,10 +35,10 @@ void SoftmaxFunctor::operator()(const Tensor *logits, kernel_ = runtime->BuildKernel("softmax", kernel_name, built_options); uint32_t idx = 0; - kernel_.setArg(idx++, *(static_cast(logits->buffer()))); + kernel_.setArg(idx++, *(logits->opencl_image())); kernel_.setArg(idx++, static_cast(channels)); kernel_.setArg(idx++, remain_channels); - kernel_.setArg(idx++, *(static_cast(output->buffer()))); + kernel_.setArg(idx++, *(output->opencl_image())); } const uint32_t gws[3] = {static_cast(channel_blocks), static_cast(width), diff --git a/mace/kernels/opencl/space_to_batch_opencl.cc b/mace/kernels/opencl/space_to_batch_opencl.cc index 5940f4d3..2eb06027 100644 --- a/mace/kernels/opencl/space_to_batch_opencl.cc +++ b/mace/kernels/opencl/space_to_batch_opencl.cc @@ -42,11 +42,11 @@ void SpaceToBatchFunctor::operator()(Tensor *space_tensor uint32_t idx = 0; if (b2s_) { - kernel_.setArg(idx++, *(static_cast(batch_tensor->buffer()))); - kernel_.setArg(idx++, *(static_cast(space_tensor->buffer()))); + kernel_.setArg(idx++, *(batch_tensor->opencl_image())); + kernel_.setArg(idx++, *(space_tensor->opencl_image())); } else { - kernel_.setArg(idx++, *(static_cast(space_tensor->buffer()))); - kernel_.setArg(idx++, *(static_cast(batch_tensor->buffer()))); + kernel_.setArg(idx++, *(space_tensor->opencl_image())); + kernel_.setArg(idx++, *(batch_tensor->opencl_image())); } kernel_.setArg(idx++, block_shape_[0]); kernel_.setArg(idx++, block_shape_[1]); diff --git a/mace/kernels/opencl/winograd_transform.cc b/mace/kernels/opencl/winograd_transform.cc index 54511220..8fd17f21 100644 --- a/mace/kernels/opencl/winograd_transform.cc +++ b/mace/kernels/opencl/winograd_transform.cc @@ -49,8 +49,8 @@ void WinogradTransformFunctor::operator()(const Tensor *i built_options); uint32_t idx = 0; - kernel_.setArg(idx++, *(static_cast(input_tensor->buffer()))); - kernel_.setArg(idx++, *(static_cast(output_tensor->buffer()))); + kernel_.setArg(idx++, *(input_tensor->opencl_image())); + kernel_.setArg(idx++, *(output_tensor->opencl_image())); kernel_.setArg(idx++, static_cast(input_tensor->dim(1))); kernel_.setArg(idx++, static_cast(input_tensor->dim(2))); kernel_.setArg(idx++, static_cast(input_tensor->dim(3))); @@ -119,11 +119,11 @@ void WinogradInverseTransformFunctor::operator()(const Te const uint32_t round_h = (height_ + 1) / 2; const uint32_t round_w = (width_ + 1) / 2; uint32_t idx = 0; - kernel_.setArg(idx++, *(static_cast(input_tensor->buffer()))); + kernel_.setArg(idx++, *(static_cast(input_tensor->opencl_image()))); if (bias != nullptr) { - kernel_.setArg(idx++, *(static_cast(bias->buffer()))); + kernel_.setArg(idx++, *(static_cast(bias->opencl_image()))); } - kernel_.setArg(idx++, *(static_cast(output_tensor->buffer()))); + kernel_.setArg(idx++, *(static_cast(output_tensor->opencl_image()))); kernel_.setArg(idx++, static_cast(output_shape[1])); kernel_.setArg(idx++, static_cast(output_shape[2])); kernel_.setArg(idx++, static_cast(round_h * round_w)); -- GitLab