未验证 提交 857b7116 编写于 作者: X xiebaiyuan 提交者: GitHub

[LITE][OPENCL][Image] fix issue in concat and nearest_interp thx for… (#3011)

* [LITE][OPENCL][Image] fix issue in concat and nearest_interp  thx for chenj and ys,test=develop

* [LITE][OPENCL][Image] fix issue in concat and nearest_interp  thx for chenj and ys,test=develop
上级 a797ed0a
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. /* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License"); Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License. you may not use this file except in compliance with the License.
You may obtain a copy of the License at You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0 http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS, distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...@@ -52,7 +49,7 @@ __kernel void concat2(__read_only image2d_t input0, ...@@ -52,7 +49,7 @@ __kernel void concat2(__read_only image2d_t input0,
input_data = READ_IMG_TYPE(CL_DTYPE_CHAR, input1, sampler, input_pos); input_data = READ_IMG_TYPE(CL_DTYPE_CHAR, input1, sampler, input_pos);
} }
int value_offset = c_in % 4; int value_offset = c_in % 4;
float value; CL_DTYPE value;
if (value_offset == 0) { if (value_offset == 0) {
value = input_data.x; value = input_data.x;
} else if (value_offset == 1) { } else if (value_offset == 1) {
...@@ -132,7 +129,7 @@ __kernel void concat_mul(__read_only image2d_t input, ...@@ -132,7 +129,7 @@ __kernel void concat_mul(__read_only image2d_t input,
int2 output_pos; int2 output_pos;
output_pos.x = (c_out / 4) * in_W + in_w; output_pos.x = (c_out / 4) * in_W + in_w;
output_pos.y = in_nh; output_pos.y = in_nh;
float val; CL_DTYPE val;
if (i == 0) { if (i == 0) {
val = input_data.x; val = input_data.x;
} else if (i == 1) { } else if (i == 1) {
......
...@@ -96,7 +96,7 @@ class ConcatComputeImage : public KernelLite<TARGET(kOpenCL), ...@@ -96,7 +96,7 @@ class ConcatComputeImage : public KernelLite<TARGET(kOpenCL),
auto& param = *param_.get_mutable<param_t>(); auto& param = *param_.get_mutable<param_t>();
const auto& x_dims = param.output->dims(); const auto& x_dims = param.output->dims();
auto image_shape = InitImageDimInfoWith(x_dims); auto image_shape = InitImageDimInfoWith(x_dims);
auto* out_buf = param.output->mutable_data<float, cl::Image2D>( auto* out_buf = param.output->mutable_data<half_t, cl::Image2D>(
image_shape["width"], image_shape["height"]); image_shape["width"], image_shape["height"]);
const auto& y_dims = param.output->dims(); // useless: check dim only const auto& y_dims = param.output->dims(); // useless: check dim only
...@@ -107,21 +107,41 @@ class ConcatComputeImage : public KernelLite<TARGET(kOpenCL), ...@@ -107,21 +107,41 @@ class ConcatComputeImage : public KernelLite<TARGET(kOpenCL),
auto inputs = param.x; auto inputs = param.x;
int arg_idx = 0; int arg_idx = 0;
int width = inputs[0]->dims()[-1]; int width = inputs[0]->dims()[inputs[0]->dims().size() - 1];
auto global_work_size = cl::NDRange{
static_cast<cl::size_type>(x_dims[-1]), LOG(INFO) << "concat 输入尺寸: ";
static_cast<cl::size_type>(image_shape["width"] / x_dims[-1]), for (size_t i = 0; i < inputs.size(); i++) {
LOG(INFO) << "inputs [" << i << "]"
<< "[" << inputs[i]->dims().size() << "D]:"
<< " dims:" << inputs[i]->dims()[0] << " "
<< inputs[i]->dims()[1] << " " << inputs[i]->dims()[2] << " "
<< inputs[i]->dims()[3];
}
LOG(INFO) << "concat 输出尺寸: ";
LOG(INFO) << " out dims: "
<< "[" << x_dims.size() << "D]:" << x_dims[0] << " " << x_dims[1]
<< " " << x_dims[2] << " " << x_dims[3];
LOG(INFO) << "axis_: " << axis_;
LOG(INFO) << "flag_: " << flag_;
auto global_work_size =
cl::NDRange{static_cast<cl::size_type>(x_dims[x_dims.size() - 1]),
static_cast<cl::size_type>(image_shape["width"] /
x_dims[x_dims.size() - 1]),
static_cast<cl::size_type>(image_shape["height"])}; static_cast<cl::size_type>(image_shape["height"])};
VLOG(4) << TargetToStr(param.output->target()); VLOG(4) << TargetToStr(param.output->target());
VLOG(4) << "image_shape(w,h):" << image_shape["width"] << " " VLOG(4) << "image_shape(w,h):" << image_shape["width"] << " "
<< image_shape["height"]; << image_shape["height"];
VLOG(4) << "x_dims[" << x_dims.size() << "D]:" << x_dims[0] << " " VLOG(4) << "x_dims[" << x_dims.size() << "D]:" << x_dims[0] << " "
<< x_dims[1] << " " << x_dims[2] << " " << x_dims[3]; << x_dims[1] << " " << x_dims[2] << " " << x_dims[3]
<< "x_dims[x_dims.size() - 1]" << x_dims[x_dims.size() - 1];
VLOG(4) << "y_dims[" << y_dims.size() << "D]:" << y_dims[0] << " " VLOG(4) << "y_dims[" << y_dims.size() << "D]:" << y_dims[0] << " "
<< y_dims[1] << " " << y_dims[2] << " " << y_dims[3]; << y_dims[1] << " " << y_dims[2] << " " << y_dims[3];
VLOG(4) << "width_: " << width_ << ", flag_: " << flag_; LOG(INFO) << "width_: " << width_ << ", flag_: " << flag_;
VLOG(4) << "global_work_size: " << x_dims[x_dims.size() - 1] << " "
<< (image_shape["width"] / x_dims[x_dims.size() - 1]) << " "
<< (image_shape["height"]);
auto kernel = context.cl_context()->GetKernel(kernel_key.str()); auto kernel = context.cl_context()->GetKernel(kernel_key.str());
int out_w = x_dims[-1]; int out_w = x_dims[x_dims.size() - 1];
int out_c = x_dims[1]; int out_c = x_dims[1];
if (inputs.size() == 2) { if (inputs.size() == 2) {
auto* x_buf0 = inputs[0]->data<float, cl::Image2D>(); auto* x_buf0 = inputs[0]->data<float, cl::Image2D>();
...@@ -159,12 +179,13 @@ class ConcatComputeImage : public KernelLite<TARGET(kOpenCL), ...@@ -159,12 +179,13 @@ class ConcatComputeImage : public KernelLite<TARGET(kOpenCL),
auto in_dims = inputs[i]->dims(); auto in_dims = inputs[i]->dims();
image_shape = InitImageDimInfoWith(in_dims); image_shape = InitImageDimInfoWith(in_dims);
auto* x_buf = inputs[i]->data<float, cl::Image2D>(); auto* x_buf = inputs[i]->data<float, cl::Image2D>();
auto in_w = in_dims[-1]; int in_w = in_dims[in_dims.size() - 1];
VLOG(4) << "image_shape(w,h):" << image_shape["width"] << " " VLOG(4) << "image_shape(w,h):" << image_shape["width"] << " "
<< image_shape["height"]; << image_shape["height"];
global_work_size = cl::NDRange{ global_work_size =
static_cast<cl::size_type>(in_dims[-1]), cl::NDRange{static_cast<cl::size_type>(in_dims[in_dims.size() - 1]),
static_cast<cl::size_type>(image_shape["width"] / in_dims[-1]), static_cast<cl::size_type>(image_shape["width"] /
in_dims[in_dims.size() - 1]),
static_cast<cl::size_type>(image_shape["height"])}; static_cast<cl::size_type>(image_shape["height"])};
cl_int status = kernel.setArg(arg_idx, *x_buf); cl_int status = kernel.setArg(arg_idx, *x_buf);
CL_CHECK_FATAL(status); CL_CHECK_FATAL(status);
...@@ -205,7 +226,7 @@ class ConcatComputeImage : public KernelLite<TARGET(kOpenCL), ...@@ -205,7 +226,7 @@ class ConcatComputeImage : public KernelLite<TARGET(kOpenCL),
int width_ = 1; int width_ = 1;
param_t* concat_param_{nullptr}; param_t* concat_param_{nullptr};
std::string kernel_func_name_{}; std::string kernel_func_name_{};
std::string build_options_{"-DCL_DTYPE_half"}; std::string build_options_{" -DCL_DTYPE_half"};
std::shared_ptr<cl::Event> event_{new cl::Event}; std::shared_ptr<cl::Event> event_{new cl::Event};
}; };
......
...@@ -45,15 +45,16 @@ class NearestInterpComputeImageDefault ...@@ -45,15 +45,16 @@ class NearestInterpComputeImageDefault
void Run() override { void Run() override {
auto& param = *param_.get_mutable<param_t>(); auto& param = *param_.get_mutable<param_t>();
const auto& x_dims = param.X->dims(); const auto& x_dims = param.X->dims();
const auto& y_dims = param.Out->dims();
auto* x_buf = auto* x_buf =
param.X->data<half_t, param.X->data<half_t,
cl::Image2D>(); // use half_t represents half float cl::Image2D>(); // use half_t represents half float
auto image_shape = InitImageDimInfoWith(x_dims); auto out_image_shape = InitImageDimInfoWith(y_dims);
auto* out_buf = param.Out->mutable_data<half_t, cl::Image2D>( // use half_t auto* out_buf = param.Out->mutable_data<half_t, cl::Image2D>( // use half_t
// represents half float // represents half float
image_shape["width"], out_image_shape["width"],
image_shape["height"]); out_image_shape["height"]);
const auto& y_dims = param.Out->dims(); // useless: check dim only
float scale_h = y_dims[2] / x_dims[2]; float scale_h = y_dims[2] / x_dims[2];
float scale_w = y_dims[3] / x_dims[3]; float scale_w = y_dims[3] / x_dims[3];
int in_dims_h = x_dims[2]; int in_dims_h = x_dims[2];
...@@ -87,16 +88,22 @@ class NearestInterpComputeImageDefault ...@@ -87,16 +88,22 @@ class NearestInterpComputeImageDefault
VLOG(4) << TargetToStr(param.X->target()); VLOG(4) << TargetToStr(param.X->target());
VLOG(4) << TargetToStr(param.Out->target()); VLOG(4) << TargetToStr(param.Out->target());
VLOG(4) << "image_shape(w,h):" << image_shape["width"] << " " VLOG(4) << "out_image_shape(w,h):" << out_image_shape["width"] << " "
<< image_shape["height"]; << out_image_shape["height"];
VLOG(4) << "x_dims[" << x_dims.size() << "D]:" << x_dims[0] << " " VLOG(4) << "x_dims[" << x_dims.size() << "D]:" << x_dims[0] << " "
<< x_dims[1] << " " << x_dims[2] << " " << x_dims[3]; << x_dims[1] << " " << x_dims[2] << " " << x_dims[3];
VLOG(4) << "y_dims[" << y_dims.size() << "D]:" << y_dims[0] << " " VLOG(4) << "y_dims[" << y_dims.size() << "D]:" << y_dims[0] << " "
<< y_dims[1] << " " << y_dims[2] << " " << y_dims[3]; << y_dims[1] << " " << y_dims[2] << " " << y_dims[3];
const std::vector<size_t>& default_work_size =
DefaultWorkSize(y_dims,
DDim(std::vector<DDim::value_type>{
static_cast<int64_t>(out_image_shape["width"]),
static_cast<int64_t>(out_image_shape["height"])}));
auto global_work_size = auto global_work_size =
cl::NDRange{static_cast<cl::size_type>(image_shape["width"]), cl::NDRange{static_cast<cl::size_type>(default_work_size.data()[0]),
static_cast<cl::size_type>(image_shape["height"])}; static_cast<cl::size_type>(default_work_size.data()[1]),
static_cast<cl::size_type>(default_work_size.data()[2])};
status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel( status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel(
kernel, kernel,
cl::NullRange, cl::NullRange,
...@@ -112,7 +119,7 @@ class NearestInterpComputeImageDefault ...@@ -112,7 +119,7 @@ class NearestInterpComputeImageDefault
private: private:
std::string kernel_func_name_{"nearest_interp"}; std::string kernel_func_name_{"nearest_interp"};
std::string build_options_{"-DCL_DTYPE_half"}; std::string build_options_{" -DCL_DTYPE_half"};
std::shared_ptr<cl::Event> event_{new cl::Event}; std::shared_ptr<cl::Event> event_{new cl::Event};
}; };
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册