From b92e9f9a40c45fd82fee4f03a40dbd0f0409fc4c Mon Sep 17 00:00:00 2001 From: Yuan Shuai Date: Tue, 25 Feb 2020 10:28:32 +0800 Subject: [PATCH] Replace uint16_t with half_t. test=develop (#2996) --- lite/kernels/opencl/CMakeLists.txt | 4 +- .../opencl/activation_image_compute.cc | 22 ++--- .../opencl/activation_image_compute_test.cc | 16 ++- lite/kernels/opencl/concat_image_compute.cc | 9 +- .../opencl/concat_image_compute_test.cc | 6 +- lite/kernels/opencl/conv_image_compute.cc | 98 +++++++++---------- lite/kernels/opencl/conv_image_compute.h | 1 + .../kernels/opencl/conv_image_compute_test.cc | 90 +++++++++-------- .../depthwise_conv2d_image_compute_test.cc | 45 +++++---- .../opencl/elementwise_add_image_compute.cc | 8 +- .../opencl/elementwise_add_image_compute.h | 1 + .../elementwise_add_image_compute_test.cc | 18 ++-- .../opencl/elementwise_mul_image_compute.cc | 9 +- .../elementwise_mul_image_compute_test.cc | 18 ++-- ...lementwise_add_activation_image_compute.cc | 1 + lite/kernels/opencl/layout_compute.cc | 5 +- lite/kernels/opencl/layout_compute_test.cc | 2 +- .../opencl/nearest_interp_image_compute.cc | 14 +-- .../nearest_interp_image_compute_test.cc | 4 +- lite/kernels/opencl/pool_image_compute.cc | 5 +- .../kernels/opencl/pool_image_compute_test.cc | 13 ++- lite/kernels/opencl/reshape_image_compute.cc | 5 +- .../opencl/reshape_image_compute_test.cc | 14 +-- lite/kernels/opencl/scale_image_compute.cc | 5 +- .../opencl/scale_image_compute_test.cc | 10 +- 25 files changed, 213 insertions(+), 210 deletions(-) diff --git a/lite/kernels/opencl/CMakeLists.txt b/lite/kernels/opencl/CMakeLists.txt index 6f7f558b73..f7d3fae878 100644 --- a/lite/kernels/opencl/CMakeLists.txt +++ b/lite/kernels/opencl/CMakeLists.txt @@ -131,8 +131,8 @@ lite_cc_test(test_mul_buffer_opencl SRCS mul_buffer_compute_test.cc ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl) #lite_cc_test(test_elementwise_add_buffer_opencl SRCS elementwise_add__buffer_compute_test.cc -# DEPS elementwise_add_opencl op_registry program context -# ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl) +# DEPS elementwise_add_opencl op_registry program context +# ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl) lite_cc_test(test_io_copy_buffer_opencl SRCS io_copy_buffer_compute_test.cc DEPS io_copy_opencl op_registry program context diff --git a/lite/kernels/opencl/activation_image_compute.cc b/lite/kernels/opencl/activation_image_compute.cc index 58eac6515f..eecbd56afb 100644 --- a/lite/kernels/opencl/activation_image_compute.cc +++ b/lite/kernels/opencl/activation_image_compute.cc @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "lite/backends/opencl/cl_half.h" #include "lite/backends/opencl/cl_include.h" #include "lite/core/kernel.h" #include "lite/core/op_registry.h" @@ -43,9 +44,9 @@ class ReluComputeImageDefault : public KernelLite(); const auto& x_dims = param.X->dims(); - auto* x_buf = param.X->data(); + auto* x_buf = param.X->data(); auto image_shape = InitImageDimInfoWith(x_dims); - auto* out_buf = param.Out->mutable_data( + auto* out_buf = param.Out->mutable_data( image_shape["width"], image_shape["height"]); const auto& y_dims = param.Out->dims(); // useless: check dim only @@ -111,9 +112,9 @@ class Relu6ComputeImageDefault : public KernelLite(); const auto& x_dims = param.X->dims(); - auto* x_buf = param.X->data(); + auto* x_buf = param.X->data(); auto image_shape = InitImageDimInfoWith(x_dims); - auto* out_buf = param.Out->mutable_data( + auto* out_buf = param.Out->mutable_data( image_shape["width"], image_shape["height"]); const auto& y_dims = param.Out->dims(); // useless: check dim only auto threshold = param.Relu_clipped_coef; @@ -185,14 +186,13 @@ class SigmoidComputeImageDefault auto& param = *param_.get_mutable(); const auto& x_dims = param.X->dims(); auto* x_buf = - param.X->data(); // use uint16_t represents half float + param.X->data(); // use half_t represents half float auto image_shape = InitImageDimInfoWith(x_dims); - auto* out_buf = - param.Out->mutable_data( // use uint16_t - // represents half float - image_shape["width"], - image_shape["height"]); + auto* out_buf = param.Out->mutable_data( // use half_t + // represents half float + image_shape["width"], + image_shape["height"]); const auto& y_dims = param.Out->dims(); // useless: check dim only auto& context = ctx_->As(); diff --git a/lite/kernels/opencl/activation_image_compute_test.cc b/lite/kernels/opencl/activation_image_compute_test.cc index fd2b3d68eb..09f48eb86f 100644 --- a/lite/kernels/opencl/activation_image_compute_test.cc +++ b/lite/kernels/opencl/activation_image_compute_test.cc @@ -133,9 +133,9 @@ TEST(relu_image2d_fp16, compute) { mapped_x[i] = static_cast(i) - x_dim.production() / 2; mapped_y[i] = static_cast(0); } - auto *relu_in_data = relu_in.mutable_data( + auto *relu_in_data = relu_in.mutable_data( relu_image2d_shape["width"], relu_image2d_shape["height"]); - auto *relu_out_data = relu_out.mutable_data( + auto *relu_out_data = relu_out.mutable_data( relu_image2d_shape["width"], relu_image2d_shape["height"]); // set context and kernel args @@ -290,9 +290,9 @@ TEST(relu6_image2d_fp16, compute) { mapped_x[i] = static_cast(i) - x_dim.production() / 2; mapped_y[i] = static_cast(0); } - auto *relu_in_data = relu_in.mutable_data( + auto *relu_in_data = relu_in.mutable_data( relu_image2d_shape["width"], relu_image2d_shape["height"]); - auto *relu_out_data = relu_out.mutable_data( + auto *relu_out_data = relu_out.mutable_data( relu_image2d_shape["width"], relu_image2d_shape["height"]); // set context and kernel args @@ -447,12 +447,10 @@ TEST(sigmoid_image2d_fp16, compute) { for (int i = 0; i < x_dim.production(); ++i) { mapped_x[i] = static_cast(dist(engine)); } - auto *sigmoid_in_data = - sigmoid_in.mutable_data( - sigmoid_image2d_shape["width"], - sigmoid_image2d_shape["height"]); + auto *sigmoid_in_data = sigmoid_in.mutable_data( + sigmoid_image2d_shape["width"], sigmoid_image2d_shape["height"]); auto *sigmoid_out_data = - sigmoid_out.mutable_data( + sigmoid_out.mutable_data( sigmoid_image2d_shape["width"], sigmoid_image2d_shape["height"]); diff --git a/lite/kernels/opencl/concat_image_compute.cc b/lite/kernels/opencl/concat_image_compute.cc index 289e739289..254f812845 100644 --- a/lite/kernels/opencl/concat_image_compute.cc +++ b/lite/kernels/opencl/concat_image_compute.cc @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "lite/backends/opencl/cl_half.h" #include "lite/backends/opencl/cl_include.h" #include "lite/core/kernel.h" #include "lite/core/op_registry.h" @@ -80,7 +81,7 @@ class ConcatComputeImage : public KernelLite(); const auto& x_dims = param.output->dims(); auto image_shape = InitImageDimInfoWith(x_dims); - auto* out_buf = param.output->mutable_data( + auto* out_buf = param.output->mutable_data( image_shape["width"], image_shape["height"]); const auto& y_dims = param.output->dims(); // useless: check dim only @@ -124,8 +125,8 @@ class ConcatComputeImage : public KernelLitedata(); - auto* x_buf1 = inputs[1]->data(); + auto* x_buf0 = inputs[0]->data(); + auto* x_buf1 = inputs[1]->data(); cl_int status = kernel.setArg(arg_idx, *x_buf0); CL_CHECK_FATAL(status); status = kernel.setArg(++arg_idx, *x_buf1); @@ -152,7 +153,7 @@ class ConcatComputeImage : public KernelLitedata(); + auto* x_buf = inputs[i]->data(); cl_int status = kernel.setArg(arg_idx, *x_buf); CL_CHECK_FATAL(status); status = kernel.setArg(++arg_idx, *out_buf); diff --git a/lite/kernels/opencl/concat_image_compute_test.cc b/lite/kernels/opencl/concat_image_compute_test.cc index eca2532e9a..43b37d9b74 100644 --- a/lite/kernels/opencl/concat_image_compute_test.cc +++ b/lite/kernels/opencl/concat_image_compute_test.cc @@ -192,15 +192,15 @@ TEST(concat_image2d, compute) { mapped_y[i] = static_cast(0); } auto *concat_in_data0 = - concat_in0.mutable_data( + concat_in0.mutable_data( concat_image2d_shape_in0["width"], concat_image2d_shape_in0["height"]); auto *concat_in_data1 = - concat_in1.mutable_data( + concat_in1.mutable_data( concat_image2d_shape_in1["width"], concat_image2d_shape_in1["height"]); auto *concat_out_data = - concat_out.mutable_data( + concat_out.mutable_data( concat_image2d_shape["width"], concat_image2d_shape["height"]); diff --git a/lite/kernels/opencl/conv_image_compute.cc b/lite/kernels/opencl/conv_image_compute.cc index 6cb0a54d20..1a718c2866 100644 --- a/lite/kernels/opencl/conv_image_compute.cc +++ b/lite/kernels/opencl/conv_image_compute.cc @@ -85,10 +85,10 @@ void ConvImageCompute::PrepareForRun() { CLImageConverterNWBlock converter; const DDim& filter_image_dims = converter.InitImageDimInfoWith(filter_dims); - std::vector filter_image_v(filter_image_dims[0] * - filter_image_dims[1] * 4); // 4 : RGBA + std::vector filter_image_v(filter_image_dims[0] * + filter_image_dims[1] * 4); // 4 : RGBA converter.NCHWToImage(filter_cpu, filter_image_v.data(), filter_dims); - filter_gpu_image_.mutable_data( + filter_gpu_image_.mutable_data( filter_image_dims[0], filter_image_dims[1], filter_image_v.data()); impl_ = &ConvImageCompute::Conv2d1x1; @@ -109,10 +109,10 @@ void ConvImageCompute::PrepareForRun() { CLImageConverterNWBlock converter; const DDim& filter_image_dims = converter.InitImageDimInfoWith(filter_dims); - std::vector filter_image_v(filter_image_dims[0] * - filter_image_dims[1] * 4); // 4 : RGBA + std::vector filter_image_v(filter_image_dims[0] * + filter_image_dims[1] * 4); // 4 : RGBA converter.NCHWToImage(filter_cpu, filter_image_v.data(), filter_dims); - filter_gpu_image_.mutable_data( + filter_gpu_image_.mutable_data( filter_image_dims[0], filter_image_dims[1], filter_image_v.data()); } else if (filter_dims[1] == 1 && x_dims[1] == output_dims[1] && kernel_h != 3) { @@ -123,10 +123,10 @@ void ConvImageCompute::PrepareForRun() { CLImageConverterNWBlock converter; const DDim& filter_image_dims = converter.InitImageDimInfoWith(filter_dims); - std::vector filter_image_v(filter_image_dims[0] * - filter_image_dims[1] * 4); // 4 : RGBA + std::vector filter_image_v(filter_image_dims[0] * + filter_image_dims[1] * 4); // 4 : RGBA converter.NCHWToImage(filter_cpu, filter_image_v.data(), filter_dims); - filter_gpu_image_.mutable_data( + filter_gpu_image_.mutable_data( filter_image_dims[0], filter_image_dims[1], filter_image_v.data()); impl_ = &ConvImageCompute::DepthwiseConv2d; @@ -137,10 +137,10 @@ void ConvImageCompute::PrepareForRun() { CLImageConverterFolder converter; const DDim& filter_image_dims = converter.InitImageDimInfoWith(filter_dims); - std::vector filter_image_v(filter_image_dims[0] * - filter_image_dims[1] * 4); // 4 : RGBA + std::vector filter_image_v(filter_image_dims[0] * + filter_image_dims[1] * 4); // 4 : RGBA converter.NCHWToImage(filter_cpu, filter_image_v.data(), filter_dims); - filter_gpu_image_.mutable_data( + filter_gpu_image_.mutable_data( filter_image_dims[0], filter_image_dims[1], filter_image_v.data()); impl_ = &ConvImageCompute::Conv2d3x3; @@ -151,10 +151,10 @@ void ConvImageCompute::PrepareForRun() { CLImageConverterFolder converter; const DDim& filter_image_dims = converter.InitImageDimInfoWith(filter_dims); - std::vector filter_image_v(filter_image_dims[0] * - filter_image_dims[1] * 4); // 4 : RGBA + std::vector filter_image_v(filter_image_dims[0] * + filter_image_dims[1] * 4); // 4 : RGBA converter.NCHWToImage(filter_cpu, filter_image_v.data(), filter_dims); - filter_gpu_image_.mutable_data( + filter_gpu_image_.mutable_data( filter_image_dims[0], filter_image_dims[1], filter_image_v.data()); impl_ = &ConvImageCompute::Conv2d5x5; @@ -165,10 +165,10 @@ void ConvImageCompute::PrepareForRun() { CLImageConverterFolder converter; const DDim& filter_image_dims = converter.InitImageDimInfoWith(filter_dims); - std::vector filter_image_v(filter_image_dims[0] * - filter_image_dims[1] * 4); // 4 : RGBA + std::vector filter_image_v(filter_image_dims[0] * + filter_image_dims[1] * 4); // 4 : RGBA converter.NCHWToImage(filter_cpu, filter_image_v.data(), filter_dims); - this->filter_gpu_image_.mutable_data( + this->filter_gpu_image_.mutable_data( filter_image_dims[0], filter_image_dims[1], filter_image_v.data()); impl_ = &ConvImageCompute::Conv2d7x7; @@ -200,12 +200,12 @@ void ConvImageCompute::PrepareForRun() { CLImageConverterFolder bias_converter; const DDim& bias_image_dims = bias_converter.InitImageDimInfoWith(param.bias->dims()); - std::vector bias_image_v(bias_image_dims[0] * bias_image_dims[1] * - 4); + std::vector bias_image_v(bias_image_dims[0] * bias_image_dims[1] * + 4); float* bias_cpu_data = param.bias->mutable_data(); bias_converter.NCHWToImage( bias_cpu_data, bias_image_v.data(), param.bias->dims()); - this->bias_gpu_image_.mutable_data( + this->bias_gpu_image_.mutable_data( bias_image_dims[0], bias_image_dims[1], bias_image_v.data()); // convert cpu buffer bias --> gpu image --- end ---- } @@ -223,8 +223,8 @@ void ConvImageCompute::Conv2d1x1() { auto input_dims = param.x->dims(); auto paddings = *param.paddings; auto strides = param.strides; - auto* input_image = param.x->data(); - auto* filter_image = filter_gpu_image_.data(); + auto* input_image = param.x->data(); + auto* filter_image = filter_gpu_image_.data(); auto filter_dims = param.filter->dims(); auto output_dims = param.output->dims(); @@ -233,7 +233,7 @@ void ConvImageCompute::Conv2d1x1() { int output_width = output_dims[3]; int output_height = output_dims[2]; auto out_image_shape = InitImageDimInfoWith(output_dims); - auto* out_image = param.output->mutable_data( + auto* out_image = param.output->mutable_data( out_image_shape["width"], out_image_shape["height"]); const bool has_bias = param.bias != nullptr; @@ -292,7 +292,7 @@ void ConvImageCompute::Conv2d1x1() { const cl::Buffer* bias_buf = nullptr; const cl::Image2D* bias_image = nullptr; if (has_bias) { - bias_image = bias_gpu_image_.data(); + bias_image = bias_gpu_image_.data(); } auto& context = ctx_->As(); @@ -373,8 +373,8 @@ void ConvImageCompute::Conv2d3x3() { auto paddings = *param.paddings; auto strides = param.strides; - auto* input_image = param.x->data(); - auto* filter_image = filter_gpu_image_.data(); + auto* input_image = param.x->data(); + auto* filter_image = filter_gpu_image_.data(); auto filter_dims = param.filter->dims(); auto output_dims = param.output->dims(); @@ -388,7 +388,7 @@ void ConvImageCompute::Conv2d3x3() { int filter_height = filter_dims[2]; int filter_channel = filter_dims[1]; auto out_image_shape = InitImageDimInfoWith(output_dims); - auto* out_image = param.output->mutable_data( + auto* out_image = param.output->mutable_data( out_image_shape["width"], out_image_shape["height"]); const bool has_bias = param.bias != nullptr; @@ -464,7 +464,7 @@ void ConvImageCompute::Conv2d3x3() { const cl::Image2D* bias_image = nullptr; if (has_bias) { - bias_image = bias_gpu_image_.data(); + bias_image = bias_gpu_image_.data(); } auto& context = ctx_->As(); @@ -549,8 +549,8 @@ void ConvImageCompute::Conv2d5x5() { auto input_dims = param.x->dims(); auto paddings = *param.paddings; auto strides = param.strides; - auto* input_image = param.x->data(); - auto* filter_image = filter_gpu_image_.data(); + auto* input_image = param.x->data(); + auto* filter_image = filter_gpu_image_.data(); auto filter_dims = param.filter->dims(); auto output_dims = param.output->dims(); @@ -561,7 +561,7 @@ void ConvImageCompute::Conv2d5x5() { int filter_width = filter_dims[3]; int filter_height = filter_dims[2]; auto out_image_shape = InitImageDimInfoWith(output_dims); - auto* out_image = param.output->mutable_data( + auto* out_image = param.output->mutable_data( out_image_shape["width"], out_image_shape["height"]); const bool has_bias = param.bias != nullptr; @@ -619,7 +619,7 @@ void ConvImageCompute::Conv2d5x5() { const cl::Image2D* bias_image = nullptr; if (has_bias) { - bias_image = bias_gpu_image_.data(); + bias_image = bias_gpu_image_.data(); } auto& context = ctx_->As(); @@ -694,8 +694,8 @@ void ConvImageCompute::Conv2d7x7() { auto input_dims = param.x->dims(); auto paddings = *param.paddings; auto strides = param.strides; - auto* input_image = param.x->data(); - auto* filter_image = filter_gpu_image_.data(); + auto* input_image = param.x->data(); + auto* filter_image = filter_gpu_image_.data(); auto filter_dims = param.filter->dims(); auto output_dims = param.output->dims(); @@ -706,7 +706,7 @@ void ConvImageCompute::Conv2d7x7() { int filter_width = filter_dims[3]; int filter_height = filter_dims[2]; auto out_image_shape = InitImageDimInfoWith(output_dims); - auto* out_image = param.output->mutable_data( + auto* out_image = param.output->mutable_data( out_image_shape["width"], out_image_shape["height"]); const bool has_bias = param.bias != nullptr; @@ -764,7 +764,7 @@ void ConvImageCompute::Conv2d7x7() { const cl::Image2D* bias_image = nullptr; if (has_bias) { - bias_image = bias_gpu_image_.data(); + bias_image = bias_gpu_image_.data(); } auto& context = ctx_->As(); @@ -845,17 +845,17 @@ void ConvImageCompute::DepthwiseConv2d3x3s1() { auto& context = ctx_->As(); CHECK(context.cl_context() != nullptr); - auto* input_img = param.x->data(); - auto* filter_img = filter_gpu_image_.data(); + auto* input_img = param.x->data(); + auto* filter_img = filter_gpu_image_.data(); const cl::Image2D* bias_img = nullptr; if (param.bias) { - bias_img = bias_gpu_image_.data(); + bias_img = bias_gpu_image_.data(); } auto image_shape = InitImageDimInfoWith(output_dims); - auto* output_img = param.output->mutable_data( + auto* output_img = param.output->mutable_data( image_shape["width"], image_shape["height"]); STL::stringstream kernel_key; @@ -926,17 +926,17 @@ void ConvImageCompute::DepthwiseConv2d3x3() { auto& context = ctx_->As(); CHECK(context.cl_context() != nullptr); - auto* input_img = param.x->data(); - auto* filter_img = filter_gpu_image_.data(); + auto* input_img = param.x->data(); + auto* filter_img = filter_gpu_image_.data(); const cl::Image2D* bias_img = nullptr; if (param.bias) { - bias_img = bias_gpu_image_.data(); + bias_img = bias_gpu_image_.data(); } auto image_shape = InitImageDimInfoWith(output_dims); - auto* output_img = param.output->mutable_data( + auto* output_img = param.output->mutable_data( image_shape["width"], image_shape["height"]); STL::stringstream kernel_key; @@ -1009,8 +1009,8 @@ void ConvImageCompute::DepthwiseConv2d() { auto input_dims = param.x->dims(); auto paddings = *param.paddings; auto strides = param.strides; - auto* input_image = param.x->data(); - auto* filter_image = filter_gpu_image_.data(); + auto* input_image = param.x->data(); + auto* filter_image = filter_gpu_image_.data(); auto filter_dims = param.filter->dims(); auto output_dims = param.output->dims(); @@ -1021,7 +1021,7 @@ void ConvImageCompute::DepthwiseConv2d() { int filter_width = filter_dims[3]; int filter_height = filter_dims[2]; auto out_image_shape = InitImageDimInfoWith(output_dims); - auto* out_image = param.output->mutable_data( + auto* out_image = param.output->mutable_data( out_image_shape["width"], out_image_shape["height"]); const bool has_bias = param.bias != nullptr; @@ -1080,7 +1080,7 @@ void ConvImageCompute::DepthwiseConv2d() { const cl::Buffer* bias_buf = nullptr; const cl::Image2D* bias_image = nullptr; if (has_bias) { - bias_image = bias_gpu_image_.data(); + bias_image = bias_gpu_image_.data(); } auto& context = ctx_->As(); diff --git a/lite/kernels/opencl/conv_image_compute.h b/lite/kernels/opencl/conv_image_compute.h index d474c880e1..f1d470a919 100644 --- a/lite/kernels/opencl/conv_image_compute.h +++ b/lite/kernels/opencl/conv_image_compute.h @@ -18,6 +18,7 @@ #include #include +#include "lite/backends/opencl/cl_half.h" #include "lite/backends/opencl/cl_include.h" #include "lite/core/kernel.h" #include "lite/core/tensor.h" diff --git a/lite/kernels/opencl/conv_image_compute_test.cc b/lite/kernels/opencl/conv_image_compute_test.cc index 9974a510e8..34c0fb71eb 100644 --- a/lite/kernels/opencl/conv_image_compute_test.cc +++ b/lite/kernels/opencl/conv_image_compute_test.cc @@ -284,13 +284,13 @@ TEST(conv2d, compute_image2d_1x1) { paddle::lite::CLImageConverterDefault default_convertor; SHADOW_LOG << "set mapped input ..."; - std::vector x_image_v( + std::vector x_image_v( input_image_width * input_image_height * 4); // 4 : RGBA - std::vector filter_image_v( + std::vector filter_image_v( filter_image_width * filter_image_height * 4); // 4 :RGBA - std::vector bias_image_v( + std::vector bias_image_v( bias_image_width * bias_image_height * 4); // 4 : RGBA - std::vector out_image_v( + std::vector out_image_v( out_image_width * out_image_height * 4); // 4 : RGBA default_convertor.NCHWToImage( @@ -301,13 +301,13 @@ TEST(conv2d, compute_image2d_1x1) { nw_convertor.NCHWToImage( filter_v.data(), filter_image_v.data(), filter_dim); - auto* input_image2d = input.mutable_data( + auto* input_image2d = input.mutable_data( input_image_width, input_image_height, x_image_v.data()); // assign filter as target arm filter.Assign(filter_v.data(), filter_dim); // auto* filter_image2d = - // filter.mutable_data( + // filter.mutable_data( // filter_image_width, // filter_image_height, // filter_image_v.data()); @@ -357,12 +357,11 @@ TEST(conv2d, compute_image2d_1x1) { SHADOW_LOG << "kernel launch ..."; kernel->Launch(); SHADOW_LOG << "mutable output ..."; - auto* output_image2d = - output.mutable_data( - out_image_width, out_image_height); + auto* output_image2d = output.mutable_data( + out_image_width, out_image_height); auto* wait_list = context->As().cl_wait_list(); - auto* out_ptr = param.output->data(); + auto* out_ptr = param.output->data(); auto it = wait_list->find(out_ptr); if (it != wait_list->end()) { @@ -375,14 +374,13 @@ TEST(conv2d, compute_image2d_1x1) { "cl tensor."; } - TargetWrapperCL::ImgcpySync( - out_image_v.data(), - output.data(), - out_image_width, - out_image_height, - cl_image2d_row_pitch, - cl_image2d_slice_pitch, - IoDirection::DtoH); + TargetWrapperCL::ImgcpySync(out_image_v.data(), + output.data(), + out_image_width, + out_image_height, + cl_image2d_row_pitch, + cl_image2d_slice_pitch, + IoDirection::DtoH); DDim out_image_shape = default_convertor.InitImageDimInfoWith(output.dims()); @@ -641,14 +639,14 @@ TEST(conv2d, compute_image2d_3x3) { paddle::lite::CLImageConverterDefault default_convertor; SHADOW_LOG << "set mapped input ..."; - std::vector x_image_v( - input_image_width * input_image_height * 4); // 4 :RGBA - std::vector filter_image_v( + std::vector x_image_v(input_image_width * + input_image_height * 4); // 4 :RGBA + std::vector filter_image_v( filter_image_width * filter_image_height * 4); // 4 : RGBA - std::vector bias_image_v( + std::vector bias_image_v( bias_image_width * bias_image_height * 4); // 4 : RGBA - std::vector out_image_v( - out_image_width * out_image_height * 4); // 4 :RGBA + std::vector out_image_v(out_image_width * + out_image_height * 4); // 4 :RGBA default_convertor.NCHWToImage( input_v.data(), x_image_v.data(), input_dim); @@ -673,7 +671,7 @@ TEST(conv2d, compute_image2d_3x3) { for (int i = 0; i < filter_image_v.size(); i++) { SHADOW_LOG << "(" << i << ")" << filter_image_v[i]; } - auto* input_image2d = input.mutable_data( + auto* input_image2d = input.mutable_data( input_image_width, input_image_height, x_image_v.data()); // assign filter as target arm filter.Assign(filter_v.data(), @@ -714,11 +712,11 @@ TEST(conv2d, compute_image2d_3x3) { SHADOW_LOG << "kernel launch ..."; kernel->Launch(); SHADOW_LOG << "mutable output ..."; - auto* output_image2d = output.mutable_data( + auto* output_image2d = output.mutable_data( out_image_width, out_image_height); auto* wait_list = context->As().cl_wait_list(); - auto* out_ptr = param.output->data(); + auto* out_ptr = param.output->data(); auto it = wait_list->find(out_ptr); if (it != wait_list->end()) { @@ -732,7 +730,7 @@ TEST(conv2d, compute_image2d_3x3) { } TargetWrapperCL::ImgcpySync(out_image_v.data(), - output.data(), + output.data(), out_image_width, out_image_height, cl_image2d_row_pitch, @@ -987,14 +985,14 @@ TEST(conv2d, compute_image2d_5x5) { paddle::lite::CLImageConverterDefault default_convertor; SHADOW_LOG << "set mapped input ..."; - std::vector x_image_v( - input_image_width * input_image_height * 4); // 4 :RGBA - std::vector filter_image_v( + std::vector x_image_v(input_image_width * + input_image_height * 4); // 4 :RGBA + std::vector filter_image_v( filter_image_width * filter_image_height * 4); // 4 : RGBA - std::vector bias_image_v( + std::vector bias_image_v( bias_image_width * bias_image_height * 4); // 4 : RGBA - std::vector out_image_v( - out_image_width * out_image_height * 4); // 4 :RGBA + std::vector out_image_v(out_image_width * + out_image_height * 4); // 4 :RGBA default_convertor.NCHWToImage( input_v.data(), x_image_v.data(), input_dim); @@ -1019,7 +1017,7 @@ TEST(conv2d, compute_image2d_5x5) { for (int i = 0; i < filter_image_v.size(); i++) { SHADOW_LOG << "(" << i << ")" << filter_image_v[i]; } - auto* input_image2d = input.mutable_data( + auto* input_image2d = input.mutable_data( input_image_width, input_image_height, x_image_v.data()); // assign filter as target arm filter.Assign(filter_v.data(), @@ -1060,11 +1058,11 @@ TEST(conv2d, compute_image2d_5x5) { SHADOW_LOG << "kernel launch ..."; kernel->Launch(); SHADOW_LOG << "mutable output ..."; - auto* output_image2d = output.mutable_data( + auto* output_image2d = output.mutable_data( out_image_width, out_image_height); auto* wait_list = context->As().cl_wait_list(); - auto* out_ptr = param.output->data(); + auto* out_ptr = param.output->data(); auto it = wait_list->find(out_ptr); if (it != wait_list->end()) { @@ -1078,7 +1076,7 @@ TEST(conv2d, compute_image2d_5x5) { } TargetWrapperCL::ImgcpySync(out_image_v.data(), - output.data(), + output.data(), out_image_width, out_image_height, cl_image2d_row_pitch, @@ -1325,13 +1323,13 @@ TEST(conv2d, compute_image2d_7x7) { paddle::lite::CLImageConverterDefault default_convertor; SHADOW_LOG << "set mapped input ..."; - std::vector x_image_v( + std::vector x_image_v( input_image_width * input_image_height * 4); // 4 : RGBA - std::vector filter_image_v( + std::vector filter_image_v( filter_image_width * filter_image_height * 4); // 4 : RGBA - std::vector bias_image_v( + std::vector bias_image_v( bias_image_width * bias_image_height * 4); // 4 : RGBA - std::vector out_image_v( + std::vector out_image_v( out_image_width * out_image_height * 4); // 4 : RGBA default_convertor.NCHWToImage( @@ -1357,7 +1355,7 @@ TEST(conv2d, compute_image2d_7x7) { for (int i = 0; i < filter_image_v.size(); i++) { SHADOW_LOG << "(" << i << ")" << filter_image_v[i]; } - auto* input_image2d = input.mutable_data( + auto* input_image2d = input.mutable_data( input_image_width, input_image_height, x_image_v.data()); // assign filter as target arm @@ -1399,11 +1397,11 @@ TEST(conv2d, compute_image2d_7x7) { SHADOW_LOG << "kernel launch ..."; kernel->Launch(); SHADOW_LOG << "mutable output ..."; - auto* output_image2d = output.mutable_data( + auto* output_image2d = output.mutable_data( out_image_width, out_image_height); auto* wait_list = context->As().cl_wait_list(); - auto* out_ptr = param.output->data(); + auto* out_ptr = param.output->data(); auto it = wait_list->find(out_ptr); if (it != wait_list->end()) { @@ -1417,7 +1415,7 @@ TEST(conv2d, compute_image2d_7x7) { } TargetWrapperCL::ImgcpySync(out_image_v.data(), - output.data(), + output.data(), out_image_width, out_image_height, cl_image2d_row_pitch, diff --git a/lite/kernels/opencl/depthwise_conv2d_image_compute_test.cc b/lite/kernels/opencl/depthwise_conv2d_image_compute_test.cc index 6cf8c13d88..3cdce0852c 100644 --- a/lite/kernels/opencl/depthwise_conv2d_image_compute_test.cc +++ b/lite/kernels/opencl/depthwise_conv2d_image_compute_test.cc @@ -253,14 +253,14 @@ TEST(depthwise_conv2d_basic, compute) { paddle::lite::CLImageConverterDefault default_convertor; VLOG(4) << "set mapped input ..."; - std::vector x_image_v(input_image_width * - input_image_height * 4); // 4 : RGBA - std::vector filter_image_v( + std::vector x_image_v(input_image_width * input_image_height * + 4); // 4 : RGBA + std::vector filter_image_v( filter_image_width * filter_image_height * 4); // 4 : RGBA - std::vector bias_image_v( - bias_image_width * bias_image_height * 4); // 4 : RGBA - std::vector out_image_v(out_image_width * out_image_height * - 4); // 4 : RGBA + std::vector bias_image_v(bias_image_width * + bias_image_height * 4); // 4 : RGBA + std::vector out_image_v(out_image_width * out_image_height * + 4); // 4 : RGBA default_convertor.NCHWToImage( input_v.data(), x_image_v.data(), input_dim); @@ -270,9 +270,9 @@ TEST(depthwise_conv2d_basic, compute) { nw_convertor.NCHWToImage( filter_v.data(), filter_image_v.data(), filter_dim); - auto* input_image2d = input.mutable_data( + auto* input_image2d = input.mutable_data( input_image_width, input_image_height, x_image_v.data()); - auto* filter_image2d = filter.mutable_data( + auto* filter_image2d = filter.mutable_data( filter_image_width, filter_image_height, filter_image_v.data()); if (bias_flag) { @@ -285,7 +285,7 @@ TEST(depthwise_conv2d_basic, compute) { CLImageConverterFolder folder_convertor; folder_convertor.NCHWToImage( bias_v.data(), bias_image_v.data(), bias_dim); - auto* bias_data = bias.mutable_data( + auto* bias_data = bias.mutable_data( bias_image_width, bias_image_height, bias_image_v.data()); } @@ -301,11 +301,11 @@ TEST(depthwise_conv2d_basic, compute) { VLOG(4) << "kernel launch ..."; kernel->Launch(); VLOG(4) << "mutable output ..."; - auto* output_image2d = output.mutable_data( + auto* output_image2d = output.mutable_data( out_image_width, out_image_height); auto* wait_list = context->As().cl_wait_list(); - auto* out_ptr = param.output->data(); + auto* out_ptr = param.output->data(); auto it = wait_list->find(out_ptr); if (it != wait_list->end()) { @@ -319,7 +319,7 @@ TEST(depthwise_conv2d_basic, compute) { } TargetWrapperCL::ImgcpySync(out_image_v.data(), - output.data(), + output.data(), out_image_width, out_image_height, cl_image2d_row_pitch, @@ -434,11 +434,11 @@ TEST(depthwise_conv2d_image2d_fp16, compute) { default_converter->InitImageDimInfoWith(input.dims()); LOG(INFO) << "input_image_shape = " << input_image_shape[0] << " " << input_image_shape[1]; - std::vector input_image_data(input_image_shape.production() * - 4); // 4 : RGBA + std::vector input_image_data(input_image_shape.production() * + 4); // 4 : RGBA default_converter->NCHWToImage( input_v.data(), input_image_data.data(), input.dims()); - auto* input_image = input.mutable_data( + auto* input_image = input.mutable_data( input_image_shape[0], input_image_shape[1], input_image_data.data()); LOG(INFO) << "prepare kernel"; @@ -447,11 +447,11 @@ TEST(depthwise_conv2d_image2d_fp16, compute) { DDim filter_image_shape = nw_converter->InitImageDimInfoWith(filter.dims()); LOG(INFO) << "filter_image_shape = " << filter_image_shape[0] << " " << filter_image_shape[1]; - std::vector filter_image_data(filter_image_shape.production() * - 4); // 4 : RGBA + std::vector filter_image_data(filter_image_shape.production() * + 4); // 4 : RGBA nw_converter->NCHWToImage( filter_v.data(), filter_image_data.data(), filter.dims()); - auto* filter_image = filter.mutable_data( + auto* filter_image = filter.mutable_data( filter_image_shape[0], filter_image_shape[1], filter_image_data.data()); LOG(INFO) << "launch"; @@ -460,13 +460,13 @@ TEST(depthwise_conv2d_image2d_fp16, compute) { default_converter->InitImageDimInfoWith(output.dims()); LOG(INFO) << "output_image_shape = " << output_image_shape[0] << " " << output_image_shape[1]; - auto* output_image = output.mutable_data( + auto* output_image = output.mutable_data( output_image_shape[0], output_image_shape[1]); kernel->Launch(); auto* wait_list = context->As().cl_wait_list(); - auto* out_ptr = param.output->data(); + auto* out_ptr = param.output->data(); auto it = wait_list->find(out_ptr); if (it != wait_list->end()) { VLOG(4) << "--- Find the sync event for the target cl tensor. ---"; @@ -491,8 +491,7 @@ TEST(depthwise_conv2d_image2d_fp16, compute) { const size_t cl_image2d_row_pitch{0}; const size_t cl_image2d_slice_pitch{0}; - uint16_t* output_image_data = - new uint16_t[output_image_shape.production() * 4]; + half_t* output_image_data = new half_t[output_image_shape.production() * 4]; TargetWrapperCL::ImgcpySync(output_image_data, output_image, output_image_shape[0], diff --git a/lite/kernels/opencl/elementwise_add_image_compute.cc b/lite/kernels/opencl/elementwise_add_image_compute.cc index f9fadb2cd3..d4285c88e5 100644 --- a/lite/kernels/opencl/elementwise_add_image_compute.cc +++ b/lite/kernels/opencl/elementwise_add_image_compute.cc @@ -78,10 +78,10 @@ void ElementwiseAddImageCompute::Run() { default_convertor.InitImageDimInfoWith(out->dims()); // w, h auto y_img_shape = default_convertor.InitImageDimInfoWith(y->dims()); - auto* x_img = x->data(); - auto* y_img = y->data(); - auto* out_img = out->mutable_data(out_img_shape[0], - out_img_shape[1]); + auto* x_img = x->data(); + auto* y_img = y->data(); + auto* out_img = out->mutable_data(out_img_shape[0], + out_img_shape[1]); VLOG(4) << "x_img_shape[w,h]:" << x_img_width << " " << x_img_height; VLOG(4) << "y_img_shape[w,h]:" << y_img_shape[0] << " " << y_img_shape[1]; diff --git a/lite/kernels/opencl/elementwise_add_image_compute.h b/lite/kernels/opencl/elementwise_add_image_compute.h index 9bb5c4dfe3..084f0fe7fb 100644 --- a/lite/kernels/opencl/elementwise_add_image_compute.h +++ b/lite/kernels/opencl/elementwise_add_image_compute.h @@ -15,6 +15,7 @@ #include #include +#include "lite/backends/opencl/cl_half.h" #include "lite/core/kernel.h" #include "lite/operators/op_params.h" #include "lite/utils/cp_logging.h" diff --git a/lite/kernels/opencl/elementwise_add_image_compute_test.cc b/lite/kernels/opencl/elementwise_add_image_compute_test.cc index 857854c3c3..ecc9e789c4 100644 --- a/lite/kernels/opencl/elementwise_add_image_compute_test.cc +++ b/lite/kernels/opencl/elementwise_add_image_compute_test.cc @@ -158,9 +158,9 @@ TEST(elementwise_add_image, compute) { auto x_img_shape = default_convertor.InitImageDimInfoWith(x_dim); // w, h auto x_img_w = x_img_shape[0]; auto x_img_h = x_img_shape[1]; - std::vector x_img_v(x_img_w * x_img_h * 4); // 4: RGBA + std::vector x_img_v(x_img_w * x_img_h * 4); // 4: RGBA default_convertor.NCHWToImage(x_v.data(), x_img_v.data(), x_dim); - eleadd_x.mutable_data( + eleadd_x.mutable_data( x_img_w, x_img_h, x_img_v.data()); // y @@ -169,10 +169,10 @@ TEST(elementwise_add_image, compute) { auto y_img_shape = default_convertor.InitImageDimInfoWith(y_dim); // w, h auto y_img_w = y_img_shape[0]; auto y_img_h = y_img_shape[1]; - std::vector y_img_v(y_img_shape[0] * y_img_shape[1] * - 4); // 4: RGBA + std::vector y_img_v(y_img_shape[0] * y_img_shape[1] * + 4); // 4: RGBA default_convertor.NCHWToImage(y_v.data(), y_img_v.data(), y_dim); - eleadd_y.mutable_data( + eleadd_y.mutable_data( y_img_w, y_img_h, y_img_v.data()); // out @@ -180,10 +180,10 @@ TEST(elementwise_add_image, compute) { default_convertor.InitImageDimInfoWith(out_dim); // w, h auto out_img_w = out_img_shape[0]; auto out_img_h = out_img_shape[1]; - eleadd_out.mutable_data(out_img_w, out_img_h); + eleadd_out.mutable_data(out_img_w, out_img_h); - std::vector out_img_v(out_img_w * out_img_h * 4); - fill_data( + std::vector out_img_v(out_img_w * out_img_h * 4); + fill_data( out_img_v.data(), out_img_v.size(), 0); // fill with zero value std::vector out_v(out_dim.production()); @@ -235,7 +235,7 @@ TEST(elementwise_add_image, compute) { const size_t cl_image2d_row_pitch{0}; const size_t cl_image2d_slice_pitch{0}; TargetWrapperCL::ImgcpySync(out_img_v.data(), - eleadd_out.data(), + eleadd_out.data(), out_img_w, out_img_h, cl_image2d_row_pitch, diff --git a/lite/kernels/opencl/elementwise_mul_image_compute.cc b/lite/kernels/opencl/elementwise_mul_image_compute.cc index c5bbf0f4a5..316dc52094 100644 --- a/lite/kernels/opencl/elementwise_mul_image_compute.cc +++ b/lite/kernels/opencl/elementwise_mul_image_compute.cc @@ -14,6 +14,7 @@ #include #include +#include "lite/backends/opencl/cl_half.h" #include "lite/backends/opencl/cl_image_converter.h" #include "lite/backends/opencl/cl_include.h" #include "lite/core/kernel.h" @@ -95,10 +96,10 @@ class ElementwiseMulImageCompute default_convertor.InitImageDimInfoWith(out->dims()); // w, h auto y_img_shape = default_convertor.InitImageDimInfoWith(y->dims()); - auto* x_img = x->data(); - auto* y_img = y->data(); - auto* out_img = out->mutable_data(out_img_shape[0], - out_img_shape[1]); + auto* x_img = x->data(); + auto* y_img = y->data(); + auto* out_img = out->mutable_data(out_img_shape[0], + out_img_shape[1]); VLOG(4) << "x_img_shape[w,h]:" << x_img_width << " " << x_img_height; VLOG(4) << "y_img_shape[w,h]:" << y_img_shape[0] << " " << y_img_shape[1]; diff --git a/lite/kernels/opencl/elementwise_mul_image_compute_test.cc b/lite/kernels/opencl/elementwise_mul_image_compute_test.cc index f177d153d0..06d0d4f7b4 100644 --- a/lite/kernels/opencl/elementwise_mul_image_compute_test.cc +++ b/lite/kernels/opencl/elementwise_mul_image_compute_test.cc @@ -151,9 +151,9 @@ TEST(elementwise_mul_image, compute) { auto x_img_shape = default_convertor.InitImageDimInfoWith(x_dim); // w, h auto x_img_w = x_img_shape[0]; auto x_img_h = x_img_shape[1]; - std::vector x_img_v(x_img_w * x_img_h * 4); // 4: RGBA + std::vector x_img_v(x_img_w * x_img_h * 4); // 4: RGBA default_convertor.NCHWToImage(x_v.data(), x_img_v.data(), x_dim); - elemul_x.mutable_data( + elemul_x.mutable_data( x_img_w, x_img_h, x_img_v.data()); // y @@ -162,10 +162,10 @@ TEST(elementwise_mul_image, compute) { auto y_img_shape = default_convertor.InitImageDimInfoWith(y_dim); // w, h auto y_img_w = y_img_shape[0]; auto y_img_h = y_img_shape[1]; - std::vector y_img_v(y_img_shape[0] * y_img_shape[1] * - 4); // 4: RGBA + std::vector y_img_v(y_img_shape[0] * y_img_shape[1] * + 4); // 4: RGBA default_convertor.NCHWToImage(y_v.data(), y_img_v.data(), y_dim); - elemul_y.mutable_data( + elemul_y.mutable_data( y_img_w, y_img_h, y_img_v.data()); // out @@ -173,10 +173,10 @@ TEST(elementwise_mul_image, compute) { default_convertor.InitImageDimInfoWith(out_dim); // w, h auto out_img_w = out_img_shape[0]; auto out_img_h = out_img_shape[1]; - elemul_out.mutable_data(out_img_w, out_img_h); + elemul_out.mutable_data(out_img_w, out_img_h); - std::vector out_img_v(out_img_w * out_img_h * 4); - fill_data( + std::vector out_img_v(out_img_w * out_img_h * 4); + fill_data( out_img_v.data(), out_img_v.size(), 0); // fill with zero value std::vector out_v(out_dim.production()); @@ -218,7 +218,7 @@ TEST(elementwise_mul_image, compute) { const size_t cl_image2d_row_pitch{0}; const size_t cl_image2d_slice_pitch{0}; TargetWrapperCL::ImgcpySync(out_img_v.data(), - elemul_out.data(), + elemul_out.data(), out_img_w, out_img_h, cl_image2d_row_pitch, diff --git a/lite/kernels/opencl/fusion_elementwise_add_activation_image_compute.cc b/lite/kernels/opencl/fusion_elementwise_add_activation_image_compute.cc index e84e9ab7f7..3f96f4224f 100644 --- a/lite/kernels/opencl/fusion_elementwise_add_activation_image_compute.cc +++ b/lite/kernels/opencl/fusion_elementwise_add_activation_image_compute.cc @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "lite/backends/opencl/cl_half.h" #include "lite/backends/opencl/cl_include.h" #include "lite/core/op_registry.h" #include "lite/kernels/opencl/elementwise_add_image_compute.h" diff --git a/lite/kernels/opencl/layout_compute.cc b/lite/kernels/opencl/layout_compute.cc index b5610d3975..046f667dbe 100644 --- a/lite/kernels/opencl/layout_compute.cc +++ b/lite/kernels/opencl/layout_compute.cc @@ -15,6 +15,7 @@ #include #include #include "lite/api/paddle_place.h" +#include "lite/backends/opencl/cl_half.h" #include "lite/core/kernel.h" #include "lite/core/op_registry.h" #include "lite/core/target_wrapper.h" @@ -47,7 +48,7 @@ class LayoutComputeBufferChwToImageDefault auto* x_data = param.x->data(); auto x_dims = param.x->dims(); auto image_shape = InitImageDimInfoWith(x_dims); - auto* y_data = param.y->mutable_data( + auto* y_data = param.y->mutable_data( image_shape["width"], image_shape["height"]); auto y_dims = param.y->dims(); @@ -146,7 +147,7 @@ class LayoutComputeImageDefaultToBufferChw void Run() override { auto& param = Param(); - auto* x_data = param.x->data(); + auto* x_data = param.x->data(); auto x_dims = param.x->dims(); auto* y_data = param.y->mutable_data(TARGET(kOpenCL)); auto y_dims = param.y->dims(); diff --git a/lite/kernels/opencl/layout_compute_test.cc b/lite/kernels/opencl/layout_compute_test.cc index 7fece3812f..93a5be5195 100644 --- a/lite/kernels/opencl/layout_compute_test.cc +++ b/lite/kernels/opencl/layout_compute_test.cc @@ -79,7 +79,7 @@ TEST(layout_ImageDefault, compute) { auto* y_data = y.mutable_data(TARGET(kOpenCL)); auto image_shape = paddle::lite::kernels::opencl::InitImageDimInfoWith(x_dim); - auto* y_image_data = y_image.mutable_data( + auto* y_image_data = y_image.mutable_data( image_shape["width"], image_shape["height"]); auto* mapped_x = static_cast(TargetWrapperCL::Map( x_data, 0, sizeof(float) * x_dim.production())); diff --git a/lite/kernels/opencl/nearest_interp_image_compute.cc b/lite/kernels/opencl/nearest_interp_image_compute.cc index 113f95eb7b..ab7de85ae7 100644 --- a/lite/kernels/opencl/nearest_interp_image_compute.cc +++ b/lite/kernels/opencl/nearest_interp_image_compute.cc @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "lite/backends/opencl/cl_half.h" #include "lite/backends/opencl/cl_include.h" #include "lite/core/kernel.h" #include "lite/core/op_registry.h" @@ -45,14 +46,13 @@ class NearestInterpComputeImageDefault auto& param = *param_.get_mutable(); const auto& x_dims = param.X->dims(); auto* x_buf = - param.X->data(); // use uint16_t represents half float + param.X->data(); // use half_t represents half float auto image_shape = InitImageDimInfoWith(x_dims); - auto* out_buf = - param.Out->mutable_data( // use uint16_t - // represents half float - image_shape["width"], - image_shape["height"]); + auto* out_buf = param.Out->mutable_data( // use half_t + // represents half float + image_shape["width"], + image_shape["height"]); const auto& y_dims = param.Out->dims(); // useless: check dim only float scale_h = y_dims[2] / x_dims[2]; float scale_w = y_dims[3] / x_dims[3]; diff --git a/lite/kernels/opencl/nearest_interp_image_compute_test.cc b/lite/kernels/opencl/nearest_interp_image_compute_test.cc index 32afd8a857..37389d7a3d 100644 --- a/lite/kernels/opencl/nearest_interp_image_compute_test.cc +++ b/lite/kernels/opencl/nearest_interp_image_compute_test.cc @@ -166,11 +166,11 @@ TEST(nearest_interp_image2d, compute) { mapped_y[i] = static_cast(0); } auto *nearest_interp_in_data = - nearest_interp_in.mutable_data( + nearest_interp_in.mutable_data( nearest_interp_image2d_shape["width"], nearest_interp_image2d_shape["height"]); auto *nearest_interp_out_data = - nearest_interp_out.mutable_data( + nearest_interp_out.mutable_data( y_dim[3], y_dim[2]); // set context and kernel args diff --git a/lite/kernels/opencl/pool_image_compute.cc b/lite/kernels/opencl/pool_image_compute.cc index 81d8f20868..adfa57f15b 100644 --- a/lite/kernels/opencl/pool_image_compute.cc +++ b/lite/kernels/opencl/pool_image_compute.cc @@ -13,6 +13,7 @@ // limitations under the License. #include +#include "lite/backends/opencl/cl_half.h" #include "lite/backends/opencl/cl_include.h" #include "lite/core/kernel.h" #include "lite/core/op_registry.h" @@ -67,13 +68,13 @@ class PoolComputeImage2D : public KernelLiteAs(); CHECK(context.cl_context() != nullptr); - auto* x_img = param.x->data(); + auto* x_img = param.x->data(); LOG(INFO) << "x_image" << x_img; auto out_image_shape = InitImageDimInfoWith(out_dims); LOG(INFO) << "out_image_shape = " << out_image_shape["width"] << " " << out_image_shape["height"]; - auto* out_img = param.output->mutable_data( + auto* out_img = param.output->mutable_data( out_image_shape["width"], out_image_shape["height"]); LOG(INFO) << "out_image" << out_img; diff --git a/lite/kernels/opencl/pool_image_compute_test.cc b/lite/kernels/opencl/pool_image_compute_test.cc index 57bc4ea854..52aa93d9fd 100644 --- a/lite/kernels/opencl/pool_image_compute_test.cc +++ b/lite/kernels/opencl/pool_image_compute_test.cc @@ -123,23 +123,22 @@ TEST(pool2d_image2d, compute) { DDim x_image_shape = default_converter->InitImageDimInfoWith(in_dim); LOG(INFO) << "x_image_shape = " << x_image_shape[0] << " " << x_image_shape[1]; - std::vector x_image_data(x_image_shape.production() * - 4); // 4 : RGBA + std::vector x_image_data(x_image_shape.production() * 4); // 4 : RGBA default_converter->NCHWToImage(input_v.data(), x_image_data.data(), in_dim); - auto* x_image = x.mutable_data( + auto* x_image = x.mutable_data( x_image_shape[0], x_image_shape[1], x_image_data.data()); LOG(INFO) << "x_image:" << x_image; DDim out_image_shape = default_converter->InitImageDimInfoWith(out_dim); LOG(INFO) << "out_image_shape = " << out_image_shape[0] << " " << out_image_shape[1]; - auto* out_image = out.mutable_data(out_image_shape[0], - out_image_shape[1]); + auto* out_image = out.mutable_data(out_image_shape[0], + out_image_shape[1]); LOG(INFO) << "out_image:" << out_image; kernel->Launch(); auto* wait_list = context->As().cl_wait_list(); - auto* out_ptr = param.output->data(); + auto* out_ptr = param.output->data(); auto it = wait_list->find(out_ptr); if (it != wait_list->end()) { VLOG(4) << "--- Find the sync event for the target cl tensor. ---"; @@ -154,7 +153,7 @@ TEST(pool2d_image2d, compute) { const size_t cl_image2d_row_pitch{0}; const size_t cl_image2d_slice_pitch{0}; - uint16_t* out_image_data = new uint16_t[out_image_shape.production() * 4]; + half_t* out_image_data = new half_t[out_image_shape.production() * 4]; TargetWrapperCL::ImgcpySync(out_image_data, out_image, out_image_shape[0], diff --git a/lite/kernels/opencl/reshape_image_compute.cc b/lite/kernels/opencl/reshape_image_compute.cc index 63af935f98..4b50cfd050 100644 --- a/lite/kernels/opencl/reshape_image_compute.cc +++ b/lite/kernels/opencl/reshape_image_compute.cc @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "lite/backends/opencl/cl_half.h" #include "lite/backends/opencl/cl_include.h" #include "lite/core/kernel.h" #include "lite/core/op_registry.h" @@ -51,7 +52,7 @@ class ReshapeComputeFloatImage : public KernelLitedata(); + const cl::Image2D* const x_image = x->data(); const std::vector& shape_vct = param.shape_vct; Tensor* const output = param.output; @@ -60,7 +61,7 @@ class ReshapeComputeFloatImage : public KernelLite& out_image_shape = InitImageDimInfoWith(out_dims); - cl::Image2D* const out_image = output->mutable_data( + cl::Image2D* const out_image = output->mutable_data( out_image_shape.at("width"), out_image_shape.at("height")); LOG(INFO) << "out_dims= " << out_dims; diff --git a/lite/kernels/opencl/reshape_image_compute_test.cc b/lite/kernels/opencl/reshape_image_compute_test.cc index eed7d279fa..950e097855 100644 --- a/lite/kernels/opencl/reshape_image_compute_test.cc +++ b/lite/kernels/opencl/reshape_image_compute_test.cc @@ -152,13 +152,13 @@ TEST(reshape_opencl, compute) { } paddle::lite::CLImageConverterDefault default_convertor; - std::vector x_image_data(input_image_width * input_image_height * - 4); // 4 : RGBA + std::vector x_image_data(input_image_width * input_image_height * + 4); // 4 : RGBA LOG(INFO) << "set mapped input ..."; default_convertor.NCHWToImage(input_v_data, x_image_data.data(), input_dim); - auto* input_image = input.mutable_data( + auto* input_image = input.mutable_data( input_image_width, input_image_height, x_image_data.data()); LOG(INFO) << "prepare kernel ready"; @@ -168,7 +168,7 @@ TEST(reshape_opencl, compute) { DDim out_image_shape = default_converter.InitImageDimInfoWith(output_dim); LOG(INFO) << "out_image_shape = " << out_image_shape[0] << " " << out_image_shape[1]; - auto* out_image = output.mutable_data( + auto* out_image = output.mutable_data( out_image_shape[0], out_image_shape[1]); VLOG(4) << "out_dims= " << output_dim; @@ -185,7 +185,7 @@ TEST(reshape_opencl, compute) { kernel->Launch(); auto* wait_list = context->As().cl_wait_list(); - auto* out_ptr = param.output->data(); + auto* out_ptr = param.output->data(); auto it = wait_list->find(out_image); if (it != wait_list->end()) { @@ -196,9 +196,9 @@ TEST(reshape_opencl, compute) { LOG(FATAL) << "Could not find the sync event for the target cl tensor."; } - uint16_t* out_image_data = new uint16_t[out_image_shape.production() * 4]; + half_t* out_image_data = new half_t[out_image_shape.production() * 4]; TargetWrapperCL::ImgcpySync(out_image_data, - output.data(), + output.data(), out_image_shape[0], out_image_shape[1], cl_image2d_row_pitch, diff --git a/lite/kernels/opencl/scale_image_compute.cc b/lite/kernels/opencl/scale_image_compute.cc index 0277b064c8..0387314f4f 100644 --- a/lite/kernels/opencl/scale_image_compute.cc +++ b/lite/kernels/opencl/scale_image_compute.cc @@ -13,6 +13,7 @@ // limitations under the License. #include +#include "lite/backends/opencl/cl_half.h" #include "lite/backends/opencl/cl_include.h" #include "lite/core/kernel.h" #include "lite/core/op_registry.h" @@ -43,7 +44,7 @@ class ScaleComputeImage2D : public KernelLite(); const auto& in_dims = param.x->dims(); - auto* x_img = param.x->data(); + auto* x_img = param.x->data(); const float scale = param.scale; const float bias = param.bias; @@ -51,7 +52,7 @@ class ScaleComputeImage2D : public KernelLitemutable_data( + auto* out_img = param.output->mutable_data( out_image_shape["width"], out_image_shape["height"]); LOG(INFO) << "out_image" << out_img; diff --git a/lite/kernels/opencl/scale_image_compute_test.cc b/lite/kernels/opencl/scale_image_compute_test.cc index 7a06d1cad8..c9461ffbb8 100644 --- a/lite/kernels/opencl/scale_image_compute_test.cc +++ b/lite/kernels/opencl/scale_image_compute_test.cc @@ -77,19 +77,19 @@ TEST(scale_image2d_fp32, compute) { CLImageConverterDefault* default_converter = new CLImageConverterDefault(); DDim image_shape = default_converter->InitImageDimInfoWith(in_dim); LOG(INFO) << "image_shape = " << image_shape[0] << " " << image_shape[1]; - std::vector x_image_data(image_shape.production() * 4); // 4 : RGBA + std::vector x_image_data(image_shape.production() * 4); // 4 : RGBA default_converter->NCHWToImage(input_v.data(), x_image_data.data(), in_dim); - auto* x_image = x.mutable_data( + auto* x_image = x.mutable_data( image_shape[0], image_shape[1], x_image_data.data()); LOG(INFO) << "x_image:" << x_image; auto* out_image = - out.mutable_data(image_shape[0], image_shape[1]); + out.mutable_data(image_shape[0], image_shape[1]); LOG(INFO) << "out_image:" << out_image; kernel->Launch(); auto* wait_list = context->As().cl_wait_list(); - auto* out_ptr = param.output->data(); + auto* out_ptr = param.output->data(); auto it = wait_list->find(out_ptr); if (it != wait_list->end()) { VLOG(4) << "--- Find the sync event for the target cl tensor. ---"; @@ -104,7 +104,7 @@ TEST(scale_image2d_fp32, compute) { const size_t cl_image2d_row_pitch{0}; const size_t cl_image2d_slice_pitch{0}; - uint16_t* out_image_data = new uint16_t[image_shape.production() * 4]; + half_t* out_image_data = new half_t[image_shape.production() * 4]; TargetWrapperCL::ImgcpySync(out_image_data, out_image, image_shape[0], -- GitLab