diff --git a/mace/core/buffer.h b/mace/core/buffer.h index bada99c06a997f804399d0d726629b88d4743e66..c17c4a1d9fcf83d8a6c8bfa7ead4fd3b3d5fe6b5 100644 --- a/mace/core/buffer.h +++ b/mace/core/buffer.h @@ -241,7 +241,9 @@ class Image : public BufferBase { mapped_buf_ = nullptr; }; - void Resize(index_t size) {} + void Resize(index_t size) { + MACE_NOT_IMPLEMENTED; + } void Copy(void *src, index_t offset, index_t length) { MACE_NOT_IMPLEMENTED; @@ -263,7 +265,11 @@ class Image : public BufferBase { class BufferSlice : public BufferBase { public: - BufferSlice() {} + BufferSlice() + : buffer_(nullptr), + mapped_buf_(nullptr), + offset_(0), + length_(0) {} BufferSlice(BufferBase *buffer, index_t offset, index_t length) : BufferBase(buffer->size()), buffer_(buffer), @@ -284,12 +290,13 @@ class BufferSlice : public BufferBase { other.length_) {} ~BufferSlice() { - if (mapped_buf_ != nullptr) { + if (buffer_ != nullptr && mapped_buf_ != nullptr) { UnMap(); } } void *buffer() { + MACE_CHECK_NOTNULL(buffer_); return buffer_->buffer(); }; @@ -330,6 +337,7 @@ class BufferSlice : public BufferBase { }; void Resize(index_t size) { + MACE_NOT_IMPLEMENTED; } void Copy(void *src, index_t offset, index_t length) { diff --git a/mace/core/tensor.h b/mace/core/tensor.h index cfe832ed234a822e7713fc59e7531f8faa3f27e4..47fa3d11387c258e8cc96d55b7a9cca68a94f9e0 100644 --- a/mace/core/tensor.h +++ b/mace/core/tensor.h @@ -5,6 +5,7 @@ #ifndef MACE_CORE_TENSOR_H_ #define MACE_CORE_TENSOR_H_ +#include "mace/core/runtime/opencl/cl2.hpp" #include "mace/core/buffer.h" #include "mace/utils/logging.h" #include "mace/core/types.h" @@ -112,10 +113,24 @@ class Tensor { return size() * SizeOfType(); } - inline void *buffer() const { - MACE_CHECK(buffer_ != nullptr && buffer_->buffer() != nullptr, - "buffer is null"); - return buffer_->buffer(); + inline bool has_opencl_image() const { + return buffer_ != nullptr && !buffer_->OnHost() + && typeid(*buffer_) == typeid(Image); + } + + inline bool has_opencl_buffer() const { + return buffer_ != nullptr && !buffer_->OnHost() + && !has_opencl_image(); + } + + inline cl::Image *opencl_image() const { + MACE_CHECK(has_opencl_image(), "do not have image"); + return static_cast(buffer_->buffer()); + } + + inline cl::Buffer *opencl_buffer() const { + MACE_CHECK(has_opencl_buffer(), "do not have opencl buffer"); + return static_cast(buffer_->buffer()); } inline index_t buffer_offset() const { @@ -152,6 +167,7 @@ class Tensor { inline void Resize(const std::vector &shape) { shape_ = shape; if (buffer_ != nullptr) { + MACE_CHECK(!has_opencl_image(), "Cannot resize image, use ResizeImage."); buffer_->Resize(raw_size()); } else { buffer_ = new Buffer(allocator_, raw_size()); @@ -159,20 +175,38 @@ class Tensor { } } - inline void ResizeLike(const Tensor &other) { - Resize(other.shape()); - } - - inline void ResizeLike(const Tensor *other) { - Resize(other->shape()); - } - inline void ResizeImage(const std::vector &shape, const std::vector &image_shape) { shape_ = shape; if (buffer_ == nullptr) { buffer_ = new Image(image_shape, dtype_); is_buffer_owner_ = true; + } else { + MACE_CHECK(has_opencl_image(), "Cannot ResizeImage buffer, use Resize."); + Image *image = dynamic_cast(buffer_); + MACE_CHECK(image_shape[0] <= image->image_shape()[0] + && image_shape[1] <= image->image_shape()[1]); + } + } + + inline void ResizeLike(const Tensor &other) { + ResizeLike(&other); + } + + inline void ResizeLike(const Tensor *other) { + if (other->has_opencl_image()) { + if (is_buffer_owner_ && buffer_ != nullptr && !has_opencl_image()) { + delete buffer_; + buffer_ = nullptr; + } + ResizeImage(other->shape(), + dynamic_cast(other->UnderlyingBuffer())->image_shape()); + } else { + if (is_buffer_owner_ && buffer_ != nullptr && has_opencl_image()) { + delete buffer_; + buffer_ = nullptr; + } + Resize(other->shape()); } } diff --git a/mace/kernels/opencl/activation_opencl.cc b/mace/kernels/opencl/activation_opencl.cc index dee010875e853ba192cf05b90abfcdf5ee2cb48f..99b8a6bc80bed3d92c8649155b7aca1210cbd0a7 100644 --- a/mace/kernels/opencl/activation_opencl.cc +++ b/mace/kernels/opencl/activation_opencl.cc @@ -60,12 +60,12 @@ void ActivationFunctor::operator()(const Tensor *input, kernel_ = runtime->BuildKernel("activation", kernel_name, built_options); int idx = 0; - kernel_.setArg(idx++, *(static_cast(input->buffer()))); + kernel_.setArg(idx++, *(input->opencl_image())); if (activation_ == PRELU) { - kernel_.setArg(idx++, *(static_cast(alpha->buffer()))); + kernel_.setArg(idx++, *(alpha->opencl_image())); } kernel_.setArg(idx++, static_cast(relux_max_limit_)); - kernel_.setArg(idx++, *(static_cast(output->buffer()))); + kernel_.setArg(idx++, *(output->opencl_image())); } const uint32_t gws[3] = {static_cast(channel_blocks), diff --git a/mace/kernels/opencl/addn.cc b/mace/kernels/opencl/addn.cc index 3495ddca6f4d097e7ff5252c433dbd3f7e08e2f2..3838808192420e8ade3127932c8db626aba8fbf0 100644 --- a/mace/kernels/opencl/addn.cc +++ b/mace/kernels/opencl/addn.cc @@ -58,9 +58,9 @@ void AddNFunctor::operator()( uint32_t idx = 0; for (auto input : input_tensors) { kernel_.setArg(idx++, - *(static_cast(input->buffer()))); + *(input->opencl_image())); } - kernel_.setArg(idx++, *(static_cast(output_tensor->buffer()))); + kernel_.setArg(idx++, *(output_tensor->opencl_image())); } const uint32_t gws[2] = { diff --git a/mace/kernels/opencl/batch_norm_opencl.cc b/mace/kernels/opencl/batch_norm_opencl.cc index 7696e875e538b0f06aefb4e30c4032fcba56a538..571bdd533e4051f841cef7efae702645023457f5 100644 --- a/mace/kernels/opencl/batch_norm_opencl.cc +++ b/mace/kernels/opencl/batch_norm_opencl.cc @@ -64,17 +64,17 @@ void BatchNormFunctor::operator()(const Tensor *input, runtime->BuildKernel("batch_norm", kernel_name, built_options); uint32_t idx = 0; - kernel_.setArg(idx++, *(static_cast(input->buffer()))); - kernel_.setArg(idx++, *(static_cast(scale->buffer()))); + kernel_.setArg(idx++, *(input->opencl_image())); + kernel_.setArg(idx++, *(scale->opencl_image())); kernel_.setArg(idx++, - *(static_cast(offset->buffer()))); + *(offset->opencl_image())); if (!folded_constant_) { kernel_.setArg(idx++, - *(static_cast(mean->buffer()))); - kernel_.setArg(idx++, *(static_cast(var->buffer()))); + *(mean->opencl_image())); + kernel_.setArg(idx++, *(var->opencl_image())); kernel_.setArg(idx++, epsilon); } - kernel_.setArg(idx++, *(static_cast(output->buffer()))); + kernel_.setArg(idx++, *(output->opencl_image())); kernel_.setArg(idx++, relux_max_limit_); } diff --git a/mace/kernels/opencl/bias_add_opencl.cc b/mace/kernels/opencl/bias_add_opencl.cc index 84eff1bfabcaad80d913fdd1aa0a73279883e4ad..c8507433ca804150df5f4d4c3277b52ebdaddd1c 100644 --- a/mace/kernels/opencl/bias_add_opencl.cc +++ b/mace/kernels/opencl/bias_add_opencl.cc @@ -35,9 +35,9 @@ void BiasAddFunctor::operator()( kernel_ = runtime->BuildKernel("bias_add", kernel_name, built_options); uint32_t idx = 0; - kernel_.setArg(idx++, *(static_cast(input->buffer()))); - kernel_.setArg(idx++, *(static_cast(bias->buffer()))); - kernel_.setArg(idx++, *(static_cast(output->buffer()))); + kernel_.setArg(idx++, *(input->opencl_image())); + kernel_.setArg(idx++, *(bias->opencl_image())); + kernel_.setArg(idx++, *(output->opencl_image())); } const uint32_t gws[3] = {static_cast(channel_blocks), diff --git a/mace/kernels/opencl/buffer_to_image.cc b/mace/kernels/opencl/buffer_to_image.cc index bc906163b7913db55065a58b3938316f2c3a490a..19be430f8d47cf6b2ef2c0a2fd28b8856f911a18 100644 --- a/mace/kernels/opencl/buffer_to_image.cc +++ b/mace/kernels/opencl/buffer_to_image.cc @@ -77,7 +77,7 @@ void BufferToImageFunctor::operator()(Tensor *buffer, built_options); uint32_t idx = 0; - b2f_kernel.setArg(idx++, *(static_cast(buffer->buffer()))); + b2f_kernel.setArg(idx++, *(buffer->opencl_buffer())); if (!i2b_) { MACE_CHECK(buffer->buffer_offset() % GetEnumTypeSize(buffer->dtype()) == 0, "buffer offset not aligned"); b2f_kernel.setArg(idx++, static_cast(buffer->buffer_offset() / GetEnumTypeSize(buffer->dtype()))); @@ -93,8 +93,7 @@ void BufferToImageFunctor::operator()(Tensor *buffer, b2f_kernel.setArg(idx++, static_cast(buffer->dim(2))); b2f_kernel.setArg(idx++, static_cast(buffer->dim(3))); } - b2f_kernel.setArg(idx++, *(static_cast(image->buffer()))); - + b2f_kernel.setArg(idx++, *(image->opencl_image())); const std::vector lws = {16, 64}; cl::Event event; cl_int error = runtime->command_queue().enqueueNDRangeKernel( diff --git a/mace/kernels/opencl/concat.cc b/mace/kernels/opencl/concat.cc index 686e3a7add8d6c5d5ee73b892941a463aa1753b1..48466e6afaaf908dd8f1fbccbaa49fcf475aa26d 100644 --- a/mace/kernels/opencl/concat.cc +++ b/mace/kernels/opencl/concat.cc @@ -42,10 +42,10 @@ static void Concat2(cl::Kernel *kernel, *kernel = runtime->BuildKernel("concat", kernel_name, built_options); uint32_t idx = 0; - kernel->setArg(idx++, *(static_cast(input0->buffer()))); - kernel->setArg(idx++, *(static_cast(input1->buffer()))); + kernel->setArg(idx++, *(static_cast(input0->opencl_image()))); + kernel->setArg(idx++, *(static_cast(input1->opencl_image()))); kernel->setArg(idx++, static_cast(input0->dim(3))); - kernel->setArg(idx++, *(static_cast(output->buffer()))); + kernel->setArg(idx++, *(static_cast(output->opencl_image()))); } const uint32_t gws[3] = { @@ -90,9 +90,9 @@ static void ConcatN(cl::Kernel *kernel, for (int i = 0; i < inputs_count; ++i) { const Tensor *input = input_list[i]; uint32_t idx = 0; - kernel->setArg(idx++, *(static_cast(input->buffer()))); + kernel->setArg(idx++, *(input->opencl_image())); kernel->setArg(idx++, static_cast(chan_blk_offset)); - kernel->setArg(idx++, *(static_cast(output->buffer()))); + kernel->setArg(idx++, *(output->opencl_image())); index_t input_channel_blk = input->dim(3) / 4; chan_blk_offset += input_channel_blk; diff --git a/mace/kernels/opencl/conv_2d_opencl_1x1.cc b/mace/kernels/opencl/conv_2d_opencl_1x1.cc index bee0e12a8826c2cd4d7bbe28ba3e70c9fe42f259..b370b32bebf84d938a7d0f8482ecdaba98e498c5 100644 --- a/mace/kernels/opencl/conv_2d_opencl_1x1.cc +++ b/mace/kernels/opencl/conv_2d_opencl_1x1.cc @@ -71,15 +71,15 @@ extern void Conv2dOpenclK1x1(cl::Kernel *kernel, uint32_t idx = 0; kernel->setArg(idx++, - *(static_cast(input->buffer()))); + *(input->opencl_image())); kernel->setArg(idx++, - *(static_cast(filter->buffer()))); + *(filter->opencl_image())); if (bias != nullptr) { kernel->setArg(idx++, - *(static_cast(bias->buffer()))); + *(bias->opencl_image())); } kernel->setArg(idx++, - *(static_cast(output->buffer()))); + *(output->opencl_image())); // FIXME handle flexable data type: half not supported kernel->setArg(idx++, relux_max_limit); kernel->setArg(idx++, static_cast(input_height)); diff --git a/mace/kernels/opencl/conv_2d_opencl_3x3.cc b/mace/kernels/opencl/conv_2d_opencl_3x3.cc index bb67717791fd05f820ad92f734af545fe2b99e1e..a7eb668ddf093c46112396ad45c4aa32700fea58 100644 --- a/mace/kernels/opencl/conv_2d_opencl_3x3.cc +++ b/mace/kernels/opencl/conv_2d_opencl_3x3.cc @@ -66,15 +66,15 @@ extern void Conv2dOpenclK3x3(cl::Kernel *kernel, uint32_t idx = 0; kernel->setArg(idx++, - *(static_cast(input->buffer()))); + *(input->opencl_image())); kernel->setArg(idx++, - *(static_cast(filter->buffer()))); + *(filter->opencl_image())); if (bias != nullptr) { kernel->setArg(idx++, - *(static_cast(bias->buffer()))); + *(bias->opencl_image())); } kernel->setArg(idx++, - *(static_cast(output->buffer()))); + *(output->opencl_image())); kernel->setArg(idx++, relux_max_limit); kernel->setArg(idx++, static_cast(input->dim(1))); kernel->setArg(idx++, static_cast(input->dim(2))); diff --git a/mace/kernels/opencl/conv_2d_opencl_general.cc b/mace/kernels/opencl/conv_2d_opencl_general.cc index af344c284fe04836d1d2ac23b4014ffdf76ac22b..5f3ffa5e90e291e9ccf4aace429e19b72ee430ce 100644 --- a/mace/kernels/opencl/conv_2d_opencl_general.cc +++ b/mace/kernels/opencl/conv_2d_opencl_general.cc @@ -66,15 +66,15 @@ extern void Conv2dOpencl(cl::Kernel *kernel, uint32_t idx = 0; kernel->setArg(idx++, - *(static_cast(input->buffer()))); + *(input->opencl_image())); kernel->setArg(idx++, - *(static_cast(filter->buffer()))); + *(filter->opencl_image())); if (bias != nullptr) { kernel->setArg(idx++, - *(static_cast(bias->buffer()))); + *(bias->opencl_image())); } kernel->setArg(idx++, - *(static_cast(output->buffer()))); + *(output->opencl_image())); kernel->setArg(idx++, relux_max_limit); kernel->setArg(idx++, static_cast(input->dim(1))); kernel->setArg(idx++, static_cast(input->dim(2))); diff --git a/mace/kernels/opencl/depthwise_conv_opencl.cc b/mace/kernels/opencl/depthwise_conv_opencl.cc index 2942c5d060b9a240c0e9c3aa47cf6e2a82a6fdfd..3bbd4f438ce00567adebd450a4101037dd69a297 100644 --- a/mace/kernels/opencl/depthwise_conv_opencl.cc +++ b/mace/kernels/opencl/depthwise_conv_opencl.cc @@ -81,16 +81,15 @@ void DepthwiseConv2d(cl::Kernel *kernel, *kernel = runtime->BuildKernel("depthwise_conv2d", kernel_name, built_options); uint32_t idx = 0; - kernel->setArg(idx++, - *(static_cast(input->buffer()))); + kernel->setArg(idx++, *(input->opencl_image())); kernel->setArg( - idx++, *(static_cast(filter->buffer()))); + idx++, *(filter->opencl_image())); if (bias != nullptr) { kernel->setArg( - idx++, *(static_cast(bias->buffer()))); + idx++, *(bias->opencl_image())); } kernel->setArg( - idx++, *(static_cast(output->buffer()))); + idx++, *(output->opencl_image())); kernel->setArg(idx++, relux_max_limit); kernel->setArg(idx++, static_cast(input_height)); kernel->setArg(idx++, static_cast(input_width)); diff --git a/mace/kernels/opencl/eltwise_opencl.cc b/mace/kernels/opencl/eltwise_opencl.cc index e49a36b242688757b7df33f9bec74746b771b003..8c589c2f16a8ed0c3e030c9e2b6d67ba02975298 100644 --- a/mace/kernels/opencl/eltwise_opencl.cc +++ b/mace/kernels/opencl/eltwise_opencl.cc @@ -39,14 +39,14 @@ void EltwiseFunctor::operator()(const Tensor *input0, uint32_t idx = 0; kernel_.setArg(idx++, - *(static_cast(input0->buffer()))); + *(input0->opencl_image())); kernel_.setArg(idx++, - *(static_cast(input1->buffer()))); + *(input1->opencl_image())); if (!coeff_.empty()) { kernel_.setArg(idx++, coeff_[0]); kernel_.setArg(idx++, coeff_[1]); } - kernel_.setArg(idx++, *(static_cast(output->buffer()))); + kernel_.setArg(idx++, *(output->opencl_image())); } const uint32_t gws[2] = { diff --git a/mace/kernels/opencl/fully_connected_opencl.cc b/mace/kernels/opencl/fully_connected_opencl.cc index 33e26ecab3a668cacf77ae7a16bcd61f13d87aa9..4a4eacc15d0f5d9fe8d1635483ffaaa35fa37ae1 100644 --- a/mace/kernels/opencl/fully_connected_opencl.cc +++ b/mace/kernels/opencl/fully_connected_opencl.cc @@ -61,15 +61,15 @@ void FullyConnectedFunctor::operator()( uint32_t idx = 0; kernel_.setArg(idx++, - *(static_cast(input->buffer()))); + *(input->opencl_image())); kernel_.setArg(idx++, - *(static_cast(weight->buffer()))); + *(weight->opencl_image())); if (bias != nullptr) { kernel_.setArg(idx++, - *(static_cast(bias->buffer()))); + *(bias->opencl_image())); } kernel_.setArg(idx++, - *(static_cast(output->buffer()))); + *(output->opencl_image())); kernel_.setArg(idx++, static_cast(input->dim(1))); kernel_.setArg(idx++, static_cast(input->dim(2))); kernel_.setArg(idx++, static_cast(input->dim(3))); diff --git a/mace/kernels/opencl/matmul.cc b/mace/kernels/opencl/matmul.cc index c7f618496392bc3ff63905507ba7cbe416f38d0f..775608537a79107ff8d3d36221ac506f8f8c3b16 100644 --- a/mace/kernels/opencl/matmul.cc +++ b/mace/kernels/opencl/matmul.cc @@ -40,11 +40,10 @@ void MatMulFunctor::operator()( kernel_ = runtime->BuildKernel("matmul", kernel_name, built_options); uint32_t idx = 0; + kernel_.setArg(idx++, *(A->opencl_image())); kernel_.setArg(idx++, - *(static_cast(A->buffer()))); - kernel_.setArg(idx++, - *(static_cast(B->buffer()))); - kernel_.setArg(idx++, *(static_cast(C->buffer()))); + *(B->opencl_image())); + kernel_.setArg(idx++, *(C->opencl_image())); kernel_.setArg(idx++, static_cast(height)); kernel_.setArg(idx++, static_cast(width)); kernel_.setArg(idx++, static_cast(A->dim(2))); diff --git a/mace/kernels/opencl/pooling_opencl.cc b/mace/kernels/opencl/pooling_opencl.cc index 2ec0e0845982ac32cb041203454342411f846e9f..1272a4fbfe716c9a2cc1b33ff6314c26fbb79630 100644 --- a/mace/kernels/opencl/pooling_opencl.cc +++ b/mace/kernels/opencl/pooling_opencl.cc @@ -65,7 +65,7 @@ void PoolingFunctor::operator()(const Tensor *input, kernel_ = runtime->BuildKernel("pooling", kernel_name, built_options); uint32_t idx = 0; - kernel_.setArg(idx++, *(static_cast(input->buffer()))); + kernel_.setArg(idx++, *(input->opencl_image())); kernel_.setArg(idx++, static_cast(input->dim(1))); kernel_.setArg(idx++, static_cast(input->dim(2))); kernel_.setArg(idx++, static_cast(out_height)); @@ -73,7 +73,7 @@ void PoolingFunctor::operator()(const Tensor *input, kernel_.setArg(idx++, paddings[1] / 2); kernel_.setArg(idx++, strides_[0]); kernel_.setArg(idx++, kernels_[0]); - kernel_.setArg(idx++, *(static_cast(output->buffer()))); + kernel_.setArg(idx++, *(output->opencl_image())); } const uint32_t gws[3] = { diff --git a/mace/kernels/opencl/resize_bilinear_opencl.cc b/mace/kernels/opencl/resize_bilinear_opencl.cc index d8f4185ee91259604fb2c3a2e153202556f94695..5761d3cbb1f8b718947d4c3ae96c6f7f57e75d35 100644 --- a/mace/kernels/opencl/resize_bilinear_opencl.cc +++ b/mace/kernels/opencl/resize_bilinear_opencl.cc @@ -48,8 +48,8 @@ void ResizeBilinearFunctor::operator()( kernel_ = runtime->BuildKernel("resize_bilinear", kernel_name, built_options); uint32_t idx = 0; - kernel_.setArg(idx++, *(static_cast(input->buffer()))); - kernel_.setArg(idx++, *(static_cast(output->buffer()))); + kernel_.setArg(idx++, *(input->opencl_image())); + kernel_.setArg(idx++, *(output->opencl_image())); kernel_.setArg(idx++, height_scale); kernel_.setArg(idx++, width_scale); kernel_.setArg(idx++, static_cast(in_height)); diff --git a/mace/kernels/opencl/softmax_opencl.cc b/mace/kernels/opencl/softmax_opencl.cc index 55a487757ebcc399d61813db5935259454dfd935..a3336aa6f721b51178d5ed136b81fe45c342dda0 100644 --- a/mace/kernels/opencl/softmax_opencl.cc +++ b/mace/kernels/opencl/softmax_opencl.cc @@ -35,10 +35,10 @@ void SoftmaxFunctor::operator()(const Tensor *logits, kernel_ = runtime->BuildKernel("softmax", kernel_name, built_options); uint32_t idx = 0; - kernel_.setArg(idx++, *(static_cast(logits->buffer()))); + kernel_.setArg(idx++, *(logits->opencl_image())); kernel_.setArg(idx++, static_cast(channels)); kernel_.setArg(idx++, remain_channels); - kernel_.setArg(idx++, *(static_cast(output->buffer()))); + kernel_.setArg(idx++, *(output->opencl_image())); } const uint32_t gws[3] = {static_cast(channel_blocks), static_cast(width), diff --git a/mace/kernels/opencl/space_to_batch_opencl.cc b/mace/kernels/opencl/space_to_batch_opencl.cc index 5940f4d3fdc8996b87765977db3a7120a86abb09..2eb06027a83ed668795329cd525c8de5d7ba2668 100644 --- a/mace/kernels/opencl/space_to_batch_opencl.cc +++ b/mace/kernels/opencl/space_to_batch_opencl.cc @@ -42,11 +42,11 @@ void SpaceToBatchFunctor::operator()(Tensor *space_tensor uint32_t idx = 0; if (b2s_) { - kernel_.setArg(idx++, *(static_cast(batch_tensor->buffer()))); - kernel_.setArg(idx++, *(static_cast(space_tensor->buffer()))); + kernel_.setArg(idx++, *(batch_tensor->opencl_image())); + kernel_.setArg(idx++, *(space_tensor->opencl_image())); } else { - kernel_.setArg(idx++, *(static_cast(space_tensor->buffer()))); - kernel_.setArg(idx++, *(static_cast(batch_tensor->buffer()))); + kernel_.setArg(idx++, *(space_tensor->opencl_image())); + kernel_.setArg(idx++, *(batch_tensor->opencl_image())); } kernel_.setArg(idx++, block_shape_[0]); kernel_.setArg(idx++, block_shape_[1]); diff --git a/mace/kernels/opencl/winograd_transform.cc b/mace/kernels/opencl/winograd_transform.cc index 54511220fdc4ce1cec32f8e2a38f0fbf38b35519..8fd17f215e587b302cd7c90763cb655433a62788 100644 --- a/mace/kernels/opencl/winograd_transform.cc +++ b/mace/kernels/opencl/winograd_transform.cc @@ -49,8 +49,8 @@ void WinogradTransformFunctor::operator()(const Tensor *i built_options); uint32_t idx = 0; - kernel_.setArg(idx++, *(static_cast(input_tensor->buffer()))); - kernel_.setArg(idx++, *(static_cast(output_tensor->buffer()))); + kernel_.setArg(idx++, *(input_tensor->opencl_image())); + kernel_.setArg(idx++, *(output_tensor->opencl_image())); kernel_.setArg(idx++, static_cast(input_tensor->dim(1))); kernel_.setArg(idx++, static_cast(input_tensor->dim(2))); kernel_.setArg(idx++, static_cast(input_tensor->dim(3))); @@ -119,11 +119,11 @@ void WinogradInverseTransformFunctor::operator()(const Te const uint32_t round_h = (height_ + 1) / 2; const uint32_t round_w = (width_ + 1) / 2; uint32_t idx = 0; - kernel_.setArg(idx++, *(static_cast(input_tensor->buffer()))); + kernel_.setArg(idx++, *(static_cast(input_tensor->opencl_image()))); if (bias != nullptr) { - kernel_.setArg(idx++, *(static_cast(bias->buffer()))); + kernel_.setArg(idx++, *(static_cast(bias->opencl_image()))); } - kernel_.setArg(idx++, *(static_cast(output_tensor->buffer()))); + kernel_.setArg(idx++, *(static_cast(output_tensor->opencl_image()))); kernel_.setArg(idx++, static_cast(output_shape[1])); kernel_.setArg(idx++, static_cast(output_shape[2])); kernel_.setArg(idx++, static_cast(round_h * round_w));