提交 730ca937 编写于 作者: Y Yuan Shuai 提交者: GitHub

[LITE][OPENCL] Comment vlog for opencl (#3252)


* [LITE][OPENCL] comment vlog for opencl kernel. test=develop
上级 1c5b0583
...@@ -101,6 +101,7 @@ class ActivationComputeImageDefault ...@@ -101,6 +101,7 @@ class ActivationComputeImageDefault
status = kernel.setArg(++arg_idx, scale_); status = kernel.setArg(++arg_idx, scale_);
CL_CHECK_FATAL(status); CL_CHECK_FATAL(status);
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << TargetToStr(param.X->target()); VLOG(4) << TargetToStr(param.X->target());
VLOG(4) << TargetToStr(param.Out->target()); VLOG(4) << TargetToStr(param.Out->target());
VLOG(4) << "image_shape(w,h):" << image_shape["width"] << " " VLOG(4) << "image_shape(w,h):" << image_shape["width"] << " "
...@@ -112,6 +113,7 @@ class ActivationComputeImageDefault ...@@ -112,6 +113,7 @@ class ActivationComputeImageDefault
VLOG(4) << "threshold:" << threshold_; VLOG(4) << "threshold:" << threshold_;
VLOG(4) << "scale:" << scale_; VLOG(4) << "scale:" << scale_;
VLOG(4) << "kernel func name:" << kernel_func_name_; VLOG(4) << "kernel func name:" << kernel_func_name_;
#endif
auto global_work_size = auto global_work_size =
cl::NDRange{static_cast<cl::size_type>(image_shape["width"]), cl::NDRange{static_cast<cl::size_type>(image_shape["width"]),
......
...@@ -77,17 +77,21 @@ class BilinearInterpImageCompute ...@@ -77,17 +77,21 @@ class BilinearInterpImageCompute
int out_h = out_dims[2]; int out_h = out_dims[2];
int out_w = out_dims[3]; int out_w = out_dims[3];
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "x->target():" << TargetToStr(x->target()); VLOG(4) << "x->target():" << TargetToStr(x->target());
VLOG(4) << "out->target():" << TargetToStr(out->target()); VLOG(4) << "out->target():" << TargetToStr(out->target());
VLOG(4) << "x->dims():" << in_dims; VLOG(4) << "x->dims():" << in_dims;
VLOG(4) << "out->dims():" << out_dims; VLOG(4) << "out->dims():" << out_dims;
#endif
auto out_image_shape = InitImageDimInfoWith(out_dims); auto out_image_shape = InitImageDimInfoWith(out_dims);
auto* x_img = x->data<half_t, cl::Image2D>(); auto* x_img = x->data<half_t, cl::Image2D>();
// VLOG(4) << "x_image: " << x_img;
auto* out_img = out->mutable_data<half_t, cl::Image2D>( auto* out_img = out->mutable_data<half_t, cl::Image2D>(
out_image_shape["width"], out_image_shape["height"]); out_image_shape["width"], out_image_shape["height"]);
#ifndef LITE_SHUTDOWN_LOG
// VLOG(4) << "x_image: " << x_img;
// VLOG(4) << "out_image: " << out_img; // VLOG(4) << "out_image: " << out_img;
VLOG(4) << "out_image_shape[w,h]: " << out_image_shape["width"] << " " VLOG(4) << "out_image_shape[w,h]: " << out_image_shape["width"] << " "
<< out_image_shape["height"]; << out_image_shape["height"];
...@@ -96,6 +100,7 @@ class BilinearInterpImageCompute ...@@ -96,6 +100,7 @@ class BilinearInterpImageCompute
<< ", align_delta: " << align_delta; << ", align_delta: " << align_delta;
VLOG(4) << "in_h: " << in_h << ", in_w: " << in_w; VLOG(4) << "in_h: " << in_h << ", in_w: " << in_w;
VLOG(4) << "out_h: " << out_h << ", out_w: " << out_w; VLOG(4) << "out_h: " << out_h << ", out_w: " << out_w;
#endif
STL::stringstream kernel_key; STL::stringstream kernel_key;
kernel_key << kernel_func_name_ << build_options_; kernel_key << kernel_func_name_ << build_options_;
...@@ -107,8 +112,10 @@ class BilinearInterpImageCompute ...@@ -107,8 +112,10 @@ class BilinearInterpImageCompute
DDim(std::vector<DDim::value_type>{ DDim(std::vector<DDim::value_type>{
static_cast<int64_t>(out_image_shape["width"]), static_cast<int64_t>(out_image_shape["width"]),
static_cast<int64_t>(out_image_shape["height"])})); static_cast<int64_t>(out_image_shape["height"])}));
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "default_work_size: " << default_work_size[0] << ", " VLOG(4) << "default_work_size: " << default_work_size[0] << ", "
<< default_work_size[1] << ", " << default_work_size[2]; << default_work_size[1] << ", " << default_work_size[2];
#endif
cl_int status = kernel.setArg(arg_idx++, *x_img); cl_int status = kernel.setArg(arg_idx++, *x_img);
CL_CHECK_FATAL(status); CL_CHECK_FATAL(status);
status = kernel.setArg(arg_idx++, *out_img); status = kernel.setArg(arg_idx++, *out_img);
...@@ -142,9 +149,10 @@ class BilinearInterpImageCompute ...@@ -142,9 +149,10 @@ class BilinearInterpImageCompute
event_.get()); event_.get());
CL_CHECK_FATAL(status); CL_CHECK_FATAL(status);
context.cl_wait_list()->emplace(out_img, event_); context.cl_wait_list()->emplace(out_img, event_);
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "global_work_size:[2D]:" << global_work_size[0] << " " VLOG(4) << "global_work_size:[2D]:" << global_work_size[0] << " "
<< global_work_size[1] << " " << global_work_size[2]; << global_work_size[1] << " " << global_work_size[2];
#endif
} }
protected: protected:
......
...@@ -123,7 +123,8 @@ class ConcatComputeImage : public KernelLite<TARGET(kOpenCL), ...@@ -123,7 +123,8 @@ class ConcatComputeImage : public KernelLite<TARGET(kOpenCL),
int arg_idx = 0; int arg_idx = 0;
int width = inputs[0]->dims()[inputs[0]->dims().size() - 1]; int width = inputs[0]->dims()[inputs[0]->dims().size() - 1];
VLOG(4) << "concat 输入尺寸: "; #ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "concat input shape: ";
for (size_t i = 0; i < inputs.size(); i++) { for (size_t i = 0; i < inputs.size(); i++) {
VLOG(4) << "inputs [" << i << "]" VLOG(4) << "inputs [" << i << "]"
<< "[" << inputs[i]->dims().size() << "D]:" << "[" << inputs[i]->dims().size() << "D]:"
...@@ -132,12 +133,13 @@ class ConcatComputeImage : public KernelLite<TARGET(kOpenCL), ...@@ -132,12 +133,13 @@ class ConcatComputeImage : public KernelLite<TARGET(kOpenCL),
<< inputs[i]->dims()[3]; << inputs[i]->dims()[3];
} }
VLOG(4) << "concat 输出尺寸: "; VLOG(4) << "concat output shape: ";
VLOG(4) << " out dims: " VLOG(4) << " out dims: "
<< "[" << x_dims.size() << "D]:" << x_dims[0] << " " << x_dims[1] << "[" << x_dims.size() << "D]:" << x_dims[0] << " " << x_dims[1]
<< " " << x_dims[2] << " " << x_dims[3]; << " " << x_dims[2] << " " << x_dims[3];
VLOG(4) << "axis_: " << axis_; VLOG(4) << "axis_: " << axis_;
VLOG(4) << "flag_: " << flag_; VLOG(4) << "flag_: " << flag_;
#endif
auto global_work_size = auto global_work_size =
cl::NDRange{static_cast<cl::size_type>(x_dims[x_dims.size() - 1]), cl::NDRange{static_cast<cl::size_type>(x_dims[x_dims.size() - 1]),
...@@ -145,6 +147,7 @@ class ConcatComputeImage : public KernelLite<TARGET(kOpenCL), ...@@ -145,6 +147,7 @@ class ConcatComputeImage : public KernelLite<TARGET(kOpenCL),
x_dims[x_dims.size() - 1]), x_dims[x_dims.size() - 1]),
static_cast<cl::size_type>(image_shape["height"])}; static_cast<cl::size_type>(image_shape["height"])};
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << TargetToStr(param.output->target()); VLOG(4) << TargetToStr(param.output->target());
VLOG(4) << "image_shape(w,h):" << image_shape["width"] << " " VLOG(4) << "image_shape(w,h):" << image_shape["width"] << " "
<< image_shape["height"]; << image_shape["height"];
...@@ -157,6 +160,7 @@ class ConcatComputeImage : public KernelLite<TARGET(kOpenCL), ...@@ -157,6 +160,7 @@ class ConcatComputeImage : public KernelLite<TARGET(kOpenCL),
VLOG(4) << "global_work_size: " << x_dims[x_dims.size() - 1] << " " VLOG(4) << "global_work_size: " << x_dims[x_dims.size() - 1] << " "
<< (image_shape["width"] / x_dims[x_dims.size() - 1]) << " " << (image_shape["width"] / x_dims[x_dims.size() - 1]) << " "
<< (image_shape["height"]); << (image_shape["height"]);
#endif
auto kernel = context.cl_context()->GetKernel(kernel_key.str()); auto kernel = context.cl_context()->GetKernel(kernel_key.str());
int out_w = x_dims[x_dims.size() - 1]; int out_w = x_dims[x_dims.size() - 1];
...@@ -198,8 +202,10 @@ class ConcatComputeImage : public KernelLite<TARGET(kOpenCL), ...@@ -198,8 +202,10 @@ class ConcatComputeImage : public KernelLite<TARGET(kOpenCL),
image_shape = InitImageDimInfoWith(in_dims); image_shape = InitImageDimInfoWith(in_dims);
auto* x_buf = inputs[i]->data<half_t, cl::Image2D>(); auto* x_buf = inputs[i]->data<half_t, cl::Image2D>();
int in_w = in_dims[in_dims.size() - 1]; int in_w = in_dims[in_dims.size() - 1];
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "image_shape(w,h):" << image_shape["width"] << " " VLOG(4) << "image_shape(w,h):" << image_shape["width"] << " "
<< image_shape["height"]; << image_shape["height"];
#endif
global_work_size = global_work_size =
cl::NDRange{static_cast<cl::size_type>(in_dims[in_dims.size() - 1]), cl::NDRange{static_cast<cl::size_type>(in_dims[in_dims.size() - 1]),
static_cast<cl::size_type>(image_shape["width"] / static_cast<cl::size_type>(image_shape["width"] /
......
...@@ -78,6 +78,7 @@ void ConvImageCompute::PrepareForRun() { ...@@ -78,6 +78,7 @@ void ConvImageCompute::PrepareForRun() {
VLOG(3) << "dilation_equal:" << dilation_equal; VLOG(3) << "dilation_equal:" << dilation_equal;
VLOG(3) << "padding :" << paddings[0] << " " << paddings[1] << " " VLOG(3) << "padding :" << paddings[0] << " " << paddings[1] << " "
<< paddings[2] << " " << paddings[3]; << paddings[2] << " " << paddings[3];
CHECK(pad_equal && stride_equal && dilation_equal); CHECK(pad_equal && stride_equal && dilation_equal);
if (kernel_h == 1 && kernel_w == 1) { if (kernel_h == 1 && kernel_w == 1) {
...@@ -269,6 +270,7 @@ void ConvImageCompute::Conv2d1x1() { ...@@ -269,6 +270,7 @@ void ConvImageCompute::Conv2d1x1() {
int w = default_work_size[1]; int w = default_work_size[1];
int nh = default_work_size[2]; int nh = default_work_size[2];
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "============ conv2d_1x1 params ============"; VLOG(4) << "============ conv2d_1x1 params ============";
VLOG(4) << "input_image_shape: " << input_image_shape["width"] << "," VLOG(4) << "input_image_shape: " << input_image_shape["width"] << ","
<< input_image_shape["height"]; << input_image_shape["height"];
...@@ -290,7 +292,7 @@ void ConvImageCompute::Conv2d1x1() { ...@@ -290,7 +292,7 @@ void ConvImageCompute::Conv2d1x1() {
VLOG(4) << "default work size{c_block, w, nh}: " VLOG(4) << "default work size{c_block, w, nh}: "
<< "{" << c_block << ", " << w << ", " << nh << "" << "{" << c_block << ", " << w << ", " << nh << ""
<< "}"; << "}";
#endif
CHECK_GE(dilations.size(), 2); CHECK_GE(dilations.size(), 2);
CHECK(dilations[0] == dilations[1]); CHECK(dilations[0] == dilations[1]);
CHECK_GE(input_dims.size(), 4); CHECK_GE(input_dims.size(), 4);
...@@ -313,10 +315,12 @@ void ConvImageCompute::Conv2d1x1() { ...@@ -313,10 +315,12 @@ void ConvImageCompute::Conv2d1x1() {
auto kernel = context.cl_context()->GetKernel(kernel_key.str()); auto kernel = context.cl_context()->GetKernel(kernel_key.str());
int maped_w = maptofactor(w, 4); int maped_w = maptofactor(w, 4);
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "kernel_key: " << kernel_key.str(); VLOG(4) << "kernel_key: " << kernel_key.str();
VLOG(4) << "kernel ready ... " << kernel_key.str(); VLOG(4) << "kernel ready ... " << kernel_key.str();
VLOG(4) << "maped_w: " << maped_w; VLOG(4) << "maped_w: " << maped_w;
VLOG(4) << "hasbias: " << has_bias; VLOG(4) << "hasbias: " << has_bias;
#endif
cl_int status; cl_int status;
int arg_idx = 0; int arg_idx = 0;
...@@ -363,21 +367,27 @@ void ConvImageCompute::Conv2d1x1() { ...@@ -363,21 +367,27 @@ void ConvImageCompute::Conv2d1x1() {
static_cast<size_t>(maped_w), static_cast<size_t>(maped_w),
static_cast<size_t>(default_work_size.data()[2])}; static_cast<size_t>(default_work_size.data()[2])};
#ifndef LITE_SHUTDOWN_LOG
// VLOG(4) << "out_image: " << out_image; // VLOG(4) << "out_image: " << out_image;
VLOG(4) << "global_work_size[3D]: {" << global_work_size[0] << "," VLOG(4) << "global_work_size[3D]: {" << global_work_size[0] << ","
<< global_work_size[1] << "," << global_work_size[2] << "}"; << global_work_size[1] << "," << global_work_size[2] << "}";
#endif
size_t max_work_group_size = 0; size_t max_work_group_size = 0;
kernel.getWorkGroupInfo<size_t>(CLRuntime::Global()->device(), kernel.getWorkGroupInfo<size_t>(CLRuntime::Global()->device(),
CL_KERNEL_WORK_GROUP_SIZE, CL_KERNEL_WORK_GROUP_SIZE,
&max_work_group_size); &max_work_group_size);
cl::NDRange local_work_size = cl::NullRange; cl::NDRange local_work_size = cl::NullRange;
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "max_work_group_size: " << max_work_group_size; VLOG(4) << "max_work_group_size: " << max_work_group_size;
#endif
if (max_work_group_size > 0 && use_lws) { if (max_work_group_size > 0 && use_lws) {
local_work_size = context.cl_context()->LocalWorkSize(global_work_size, local_work_size = context.cl_context()->LocalWorkSize(global_work_size,
max_work_group_size); max_work_group_size);
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "local_work_size[3D]: {" << local_work_size[0] << "," VLOG(4) << "local_work_size[3D]: {" << local_work_size[0] << ","
<< local_work_size[1] << "," << local_work_size[2] << "}"; << local_work_size[1] << "," << local_work_size[2] << "}";
#endif
} }
status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel( status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel(
...@@ -453,6 +463,7 @@ void ConvImageCompute::Conv2d3x3() { ...@@ -453,6 +463,7 @@ void ConvImageCompute::Conv2d3x3() {
int w = default_work_size[1]; int w = default_work_size[1];
int nh = default_work_size[2]; int nh = default_work_size[2];
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "============ conv2d params ============"; VLOG(4) << "============ conv2d params ============";
VLOG(4) << "input_image_shape: " << input_image_shape["width"] << "," VLOG(4) << "input_image_shape: " << input_image_shape["width"] << ","
<< input_image_shape["height"]; << input_image_shape["height"];
...@@ -477,6 +488,7 @@ void ConvImageCompute::Conv2d3x3() { ...@@ -477,6 +488,7 @@ void ConvImageCompute::Conv2d3x3() {
VLOG(4) << "default work size{c_block, w, nh}: " VLOG(4) << "default work size{c_block, w, nh}: "
<< "{" << c_block << ", " << w << ", " << nh << "" << "{" << c_block << ", " << w << ", " << nh << ""
<< "}"; << "}";
#endif
CHECK_GE(dilations.size(), 2); CHECK_GE(dilations.size(), 2);
CHECK(dilations[0] == dilations[1]); CHECK(dilations[0] == dilations[1]);
...@@ -496,9 +508,12 @@ void ConvImageCompute::Conv2d3x3() { ...@@ -496,9 +508,12 @@ void ConvImageCompute::Conv2d3x3() {
STL::stringstream kernel_key; STL::stringstream kernel_key;
kernel_key << kernel_func_names_[0] << build_options_[0]; kernel_key << kernel_func_names_[0] << build_options_[0];
auto kernel = context.cl_context()->GetKernel(kernel_key.str()); auto kernel = context.cl_context()->GetKernel(kernel_key.str());
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "kernel_key: " << kernel_key.str(); VLOG(4) << "kernel_key: " << kernel_key.str();
VLOG(4) << "kernel ready ... " << kernel_key.str(); VLOG(4) << "kernel ready ... " << kernel_key.str();
VLOG(4) << "w: " << w; VLOG(4) << "w: " << w;
#endif
cl_int status; cl_int status;
int arg_idx = 0; int arg_idx = 0;
...@@ -513,7 +528,9 @@ void ConvImageCompute::Conv2d3x3() { ...@@ -513,7 +528,9 @@ void ConvImageCompute::Conv2d3x3() {
status = kernel.setArg(++arg_idx, *filter_image); status = kernel.setArg(++arg_idx, *filter_image);
CL_CHECK_FATAL(status); CL_CHECK_FATAL(status);
if (has_bias) { if (has_bias) {
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "set bias_image: "; VLOG(4) << "set bias_image: ";
#endif
status = kernel.setArg(++arg_idx, *bias_image); status = kernel.setArg(++arg_idx, *bias_image);
CL_CHECK_FATAL(status); CL_CHECK_FATAL(status);
} }
...@@ -553,9 +570,11 @@ void ConvImageCompute::Conv2d3x3() { ...@@ -553,9 +570,11 @@ void ConvImageCompute::Conv2d3x3() {
static_cast<size_t>(default_work_size.data()[1]), static_cast<size_t>(default_work_size.data()[1]),
static_cast<size_t>(default_work_size.data()[2])}; static_cast<size_t>(default_work_size.data()[2])};
#ifndef LITE_SHUTDOWN_LOG
// VLOG(4) << "out_image: " << out_image; // VLOG(4) << "out_image: " << out_image;
VLOG(4) << "global_work_size[3D]: {" << global_work_size[0] << "," VLOG(4) << "global_work_size[3D]: {" << global_work_size[0] << ","
<< global_work_size[1] << "," << global_work_size[2] << "}"; << global_work_size[1] << "," << global_work_size[2] << "}";
#endif
status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel( status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel(
kernel, kernel,
...@@ -611,8 +630,9 @@ void ConvImageCompute::Conv2d3x3opt() { ...@@ -611,8 +630,9 @@ void ConvImageCompute::Conv2d3x3opt() {
int h_blk_size = 1; int h_blk_size = 1;
int h_blk = (nh + h_blk_size - 1) / h_blk_size; int h_blk = (nh + h_blk_size - 1) / h_blk_size;
// default_work_size[2] = h_blk; // default_work_size[2] = h_blk;
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "============ conv2d params ============"; VLOG(4) << "============ conv2d params ============";
// VLOG(4) << "input_image_shape: " << input_image_shape["width"] << "," // VLOG(4) << "input_image_shape: " << input_image_shape["width"] << ","
// << input_image_shape["height"]; // << input_image_shape["height"];
...@@ -632,6 +652,7 @@ void ConvImageCompute::Conv2d3x3opt() { ...@@ -632,6 +652,7 @@ void ConvImageCompute::Conv2d3x3opt() {
VLOG(4) << "default work size{c_block, w, nh}: " VLOG(4) << "default work size{c_block, w, nh}: "
<< "{" << c_block << ", " << w << ", " << nh << "" << "{" << c_block << ", " << w << ", " << nh << ""
<< "}"; << "}";
#endif
CHECK_GE(dilations.size(), 2); CHECK_GE(dilations.size(), 2);
CHECK(dilations[0] == dilations[1]); CHECK(dilations[0] == dilations[1]);
...@@ -651,8 +672,11 @@ void ConvImageCompute::Conv2d3x3opt() { ...@@ -651,8 +672,11 @@ void ConvImageCompute::Conv2d3x3opt() {
STL::stringstream kernel_key; STL::stringstream kernel_key;
kernel_key << kernel_func_names_[0] << build_options_[0]; kernel_key << kernel_func_names_[0] << build_options_[0];
auto kernel = context.cl_context()->GetKernel(kernel_key.str()); auto kernel = context.cl_context()->GetKernel(kernel_key.str());
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "kernel_key: " << kernel_key.str(); VLOG(4) << "kernel_key: " << kernel_key.str();
VLOG(4) << "kernel ready ... " << kernel_key.str(); VLOG(4) << "kernel ready ... " << kernel_key.str();
#endif
cl_int status; cl_int status;
int arg_idx = 0; int arg_idx = 0;
...@@ -667,7 +691,9 @@ void ConvImageCompute::Conv2d3x3opt() { ...@@ -667,7 +691,9 @@ void ConvImageCompute::Conv2d3x3opt() {
status = kernel.setArg(++arg_idx, *filter_image); status = kernel.setArg(++arg_idx, *filter_image);
CL_CHECK_FATAL(status); CL_CHECK_FATAL(status);
if (has_bias) { if (has_bias) {
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "set bias_image: "; VLOG(4) << "set bias_image: ";
#endif
status = kernel.setArg(++arg_idx, *bias_image); status = kernel.setArg(++arg_idx, *bias_image);
CL_CHECK_FATAL(status); CL_CHECK_FATAL(status);
} }
...@@ -696,22 +722,27 @@ void ConvImageCompute::Conv2d3x3opt() { ...@@ -696,22 +722,27 @@ void ConvImageCompute::Conv2d3x3opt() {
cl::NDRange{static_cast<size_t>(default_work_size.data()[0]), cl::NDRange{static_cast<size_t>(default_work_size.data()[0]),
static_cast<size_t>(w_blk), static_cast<size_t>(w_blk),
static_cast<size_t>(h_blk)}; static_cast<size_t>(h_blk)};
#ifndef LITE_SHUTDOWN_LOG
// VLOG(4) << "out_image: " << out_image; // VLOG(4) << "out_image: " << out_image;
VLOG(4) << "global_work_size[3D]: {" << global_work_size[0] << "," VLOG(4) << "global_work_size[3D]: {" << global_work_size[0] << ","
<< global_work_size[1] << "," << global_work_size[2] << "}"; << global_work_size[1] << "," << global_work_size[2] << "}";
#endif
size_t max_work_group_size = 0; size_t max_work_group_size = 0;
kernel.getWorkGroupInfo<size_t>(CLRuntime::Global()->device(), kernel.getWorkGroupInfo<size_t>(CLRuntime::Global()->device(),
CL_KERNEL_WORK_GROUP_SIZE, CL_KERNEL_WORK_GROUP_SIZE,
&max_work_group_size); &max_work_group_size);
cl::NDRange local_work_size = cl::NullRange; cl::NDRange local_work_size = cl::NullRange;
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "max_work_group_size: " << max_work_group_size; VLOG(4) << "max_work_group_size: " << max_work_group_size;
#endif
if (max_work_group_size > 0 && use_lws) { if (max_work_group_size > 0 && use_lws) {
local_work_size = context.cl_context()->LocalWorkSize(global_work_size, local_work_size = context.cl_context()->LocalWorkSize(global_work_size,
max_work_group_size); max_work_group_size);
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "local_work_size[3D]: {" << local_work_size[0] << "," VLOG(4) << "local_work_size[3D]: {" << local_work_size[0] << ","
<< local_work_size[1] << "," << local_work_size[2] << "}"; << local_work_size[1] << "," << local_work_size[2] << "}";
#endif
} }
status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel( status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel(
...@@ -767,6 +798,7 @@ void ConvImageCompute::Conv2d5x5() { ...@@ -767,6 +798,7 @@ void ConvImageCompute::Conv2d5x5() {
int w = default_work_size[1]; int w = default_work_size[1];
int nh = default_work_size[2]; int nh = default_work_size[2];
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "============ conv2d params ============"; VLOG(4) << "============ conv2d params ============";
VLOG(4) << "input_image_shape: " << input_image_shape["width"] << "," VLOG(4) << "input_image_shape: " << input_image_shape["width"] << ","
<< input_image_shape["height"]; << input_image_shape["height"];
...@@ -789,6 +821,7 @@ void ConvImageCompute::Conv2d5x5() { ...@@ -789,6 +821,7 @@ void ConvImageCompute::Conv2d5x5() {
VLOG(4) << "default work size{c_block, w, nh}: " VLOG(4) << "default work size{c_block, w, nh}: "
<< "{" << c_block << ", " << w << ", " << nh << "" << "{" << c_block << ", " << w << ", " << nh << ""
<< "}"; << "}";
#endif
CHECK_GE(dilations.size(), 2); CHECK_GE(dilations.size(), 2);
CHECK(dilations[0] == dilations[1]); CHECK(dilations[0] == dilations[1]);
...@@ -808,9 +841,12 @@ void ConvImageCompute::Conv2d5x5() { ...@@ -808,9 +841,12 @@ void ConvImageCompute::Conv2d5x5() {
STL::stringstream kernel_key; STL::stringstream kernel_key;
kernel_key << kernel_func_names_[0] << build_options_[0]; kernel_key << kernel_func_names_[0] << build_options_[0];
auto kernel = context.cl_context()->GetKernel(kernel_key.str()); auto kernel = context.cl_context()->GetKernel(kernel_key.str());
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "kernel_key: " << kernel_key.str(); VLOG(4) << "kernel_key: " << kernel_key.str();
VLOG(4) << "kernel ready ... " << kernel_key.str(); VLOG(4) << "kernel ready ... " << kernel_key.str();
VLOG(4) << "w: " << w; VLOG(4) << "w: " << w;
#endif
cl_int status; cl_int status;
int arg_idx = 0; int arg_idx = 0;
...@@ -825,7 +861,9 @@ void ConvImageCompute::Conv2d5x5() { ...@@ -825,7 +861,9 @@ void ConvImageCompute::Conv2d5x5() {
status = kernel.setArg(++arg_idx, *filter_image); status = kernel.setArg(++arg_idx, *filter_image);
CL_CHECK_FATAL(status); CL_CHECK_FATAL(status);
if (has_bias) { if (has_bias) {
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "set bias_image: "; VLOG(4) << "set bias_image: ";
#endif
status = kernel.setArg(++arg_idx, *bias_image); status = kernel.setArg(++arg_idx, *bias_image);
CL_CHECK_FATAL(status); CL_CHECK_FATAL(status);
} }
...@@ -855,9 +893,11 @@ void ConvImageCompute::Conv2d5x5() { ...@@ -855,9 +893,11 @@ void ConvImageCompute::Conv2d5x5() {
static_cast<size_t>(default_work_size.data()[1]), static_cast<size_t>(default_work_size.data()[1]),
static_cast<size_t>(default_work_size.data()[2])}; static_cast<size_t>(default_work_size.data()[2])};
#ifndef LITE_SHUTDOWN_LOG
// VLOG(4) << "out_image: " << out_image; // VLOG(4) << "out_image: " << out_image;
VLOG(4) << "global_work_size[3D]: {" << global_work_size[0] << "," VLOG(4) << "global_work_size[3D]: {" << global_work_size[0] << ","
<< global_work_size[1] << "," << global_work_size[2] << "}"; << global_work_size[1] << "," << global_work_size[2] << "}";
#endif
status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel( status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel(
kernel, kernel,
...@@ -912,6 +952,7 @@ void ConvImageCompute::Conv2d7x7() { ...@@ -912,6 +952,7 @@ void ConvImageCompute::Conv2d7x7() {
int w = default_work_size[1]; int w = default_work_size[1];
int nh = default_work_size[2]; int nh = default_work_size[2];
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "============ conv2d params ============"; VLOG(4) << "============ conv2d params ============";
VLOG(4) << "input_image_shape: " << input_image_shape["width"] << "," VLOG(4) << "input_image_shape: " << input_image_shape["width"] << ","
<< input_image_shape["height"]; << input_image_shape["height"];
...@@ -934,6 +975,7 @@ void ConvImageCompute::Conv2d7x7() { ...@@ -934,6 +975,7 @@ void ConvImageCompute::Conv2d7x7() {
VLOG(4) << "default work size{c_block, w, nh}: " VLOG(4) << "default work size{c_block, w, nh}: "
<< "{" << c_block << ", " << w << ", " << nh << "" << "{" << c_block << ", " << w << ", " << nh << ""
<< "}"; << "}";
#endif
CHECK_GE(dilations.size(), 2); CHECK_GE(dilations.size(), 2);
CHECK(dilations[0] == dilations[1]); CHECK(dilations[0] == dilations[1]);
...@@ -953,9 +995,12 @@ void ConvImageCompute::Conv2d7x7() { ...@@ -953,9 +995,12 @@ void ConvImageCompute::Conv2d7x7() {
STL::stringstream kernel_key; STL::stringstream kernel_key;
kernel_key << kernel_func_names_[0] << build_options_[0]; kernel_key << kernel_func_names_[0] << build_options_[0];
auto kernel = context.cl_context()->GetKernel(kernel_key.str()); auto kernel = context.cl_context()->GetKernel(kernel_key.str());
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "kernel_key: " << kernel_key.str(); VLOG(4) << "kernel_key: " << kernel_key.str();
VLOG(4) << "kernel ready ... " << kernel_key.str(); VLOG(4) << "kernel ready ... " << kernel_key.str();
VLOG(4) << "w: " << w; VLOG(4) << "w: " << w;
#endif
cl_int status; cl_int status;
int arg_idx = 0; int arg_idx = 0;
...@@ -970,7 +1015,9 @@ void ConvImageCompute::Conv2d7x7() { ...@@ -970,7 +1015,9 @@ void ConvImageCompute::Conv2d7x7() {
status = kernel.setArg(++arg_idx, *filter_image); status = kernel.setArg(++arg_idx, *filter_image);
CL_CHECK_FATAL(status); CL_CHECK_FATAL(status);
if (has_bias) { if (has_bias) {
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "set bias_image: "; VLOG(4) << "set bias_image: ";
#endif
status = kernel.setArg(++arg_idx, *bias_image); status = kernel.setArg(++arg_idx, *bias_image);
CL_CHECK_FATAL(status); CL_CHECK_FATAL(status);
} }
...@@ -1000,9 +1047,11 @@ void ConvImageCompute::Conv2d7x7() { ...@@ -1000,9 +1047,11 @@ void ConvImageCompute::Conv2d7x7() {
static_cast<size_t>(default_work_size.data()[1]), static_cast<size_t>(default_work_size.data()[1]),
static_cast<size_t>(default_work_size.data()[2])}; static_cast<size_t>(default_work_size.data()[2])};
#ifndef LITE_SHUTDOWN_LOG
// VLOG(4) << "out_image: " << out_image; // VLOG(4) << "out_image: " << out_image;
VLOG(4) << "global_work_size[3D]: {" << global_work_size[0] << "," VLOG(4) << "global_work_size[3D]: {" << global_work_size[0] << ","
<< global_work_size[1] << "," << global_work_size[2] << "}"; << global_work_size[1] << "," << global_work_size[2] << "}";
#endif
status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel( status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel(
kernel, kernel,
...@@ -1071,7 +1120,9 @@ void ConvImageCompute::DepthwiseConv2d3x3s1() { ...@@ -1071,7 +1120,9 @@ void ConvImageCompute::DepthwiseConv2d3x3s1() {
const cl::Image2D* bias_image = nullptr; const cl::Image2D* bias_image = nullptr;
if (has_bias) { if (has_bias) {
bias_image = bias_gpu_image_.data<half_t, cl::Image2D>(); bias_image = bias_gpu_image_.data<half_t, cl::Image2D>();
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "set bias_image: "; VLOG(4) << "set bias_image: ";
#endif
status = kernel.setArg(++arg_idx, *bias_image); status = kernel.setArg(++arg_idx, *bias_image);
CL_CHECK_FATAL(status); CL_CHECK_FATAL(status);
} }
...@@ -1099,12 +1150,16 @@ void ConvImageCompute::DepthwiseConv2d3x3s1() { ...@@ -1099,12 +1150,16 @@ void ConvImageCompute::DepthwiseConv2d3x3s1() {
CL_KERNEL_WORK_GROUP_SIZE, CL_KERNEL_WORK_GROUP_SIZE,
&max_work_group_size); &max_work_group_size);
cl::NDRange local_work_size = cl::NullRange; cl::NDRange local_work_size = cl::NullRange;
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "max_work_group_size: " << max_work_group_size; VLOG(4) << "max_work_group_size: " << max_work_group_size;
#endif
if (max_work_group_size > 0 && use_lws) { if (max_work_group_size > 0 && use_lws) {
local_work_size = context.cl_context()->LocalWorkSize(global_work_size, local_work_size = context.cl_context()->LocalWorkSize(global_work_size,
max_work_group_size); max_work_group_size);
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "local_work_size[3D]: {" << local_work_size[0] << "," VLOG(4) << "local_work_size[3D]: {" << local_work_size[0] << ","
<< local_work_size[1] << "," << local_work_size[2] << "}"; << local_work_size[1] << "," << local_work_size[2] << "}";
#endif
} }
status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel( status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel(
...@@ -1153,6 +1208,7 @@ void ConvImageCompute::DepthwiseConv2d3x3() { ...@@ -1153,6 +1208,7 @@ void ConvImageCompute::DepthwiseConv2d3x3() {
int nh = output_dims[0] * output_dims[2]; int nh = output_dims[0] * output_dims[2];
auto global_work_size = cl::NDRange(c_block, w, nh); auto global_work_size = cl::NDRange(c_block, w, nh);
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "setArg"; VLOG(4) << "setArg";
VLOG(4) << "c_block = " << c_block; VLOG(4) << "c_block = " << c_block;
VLOG(4) << "w = " << w; VLOG(4) << "w = " << w;
...@@ -1166,6 +1222,7 @@ void ConvImageCompute::DepthwiseConv2d3x3() { ...@@ -1166,6 +1222,7 @@ void ConvImageCompute::DepthwiseConv2d3x3() {
VLOG(4) << "x_dims[2] = " << x_dims[2]; VLOG(4) << "x_dims[2] = " << x_dims[2];
VLOG(4) << "output_dims[3] = " << output_dims[3]; VLOG(4) << "output_dims[3] = " << output_dims[3];
VLOG(4) << "output_dims[2] = " << output_dims[2]; VLOG(4) << "output_dims[2] = " << output_dims[2];
#endif
cl_int status; cl_int status;
int arg_idx = 0; int arg_idx = 0;
...@@ -1185,7 +1242,9 @@ void ConvImageCompute::DepthwiseConv2d3x3() { ...@@ -1185,7 +1242,9 @@ void ConvImageCompute::DepthwiseConv2d3x3() {
const cl::Image2D* bias_image = nullptr; const cl::Image2D* bias_image = nullptr;
if (has_bias) { if (has_bias) {
bias_image = bias_gpu_image_.data<half_t, cl::Image2D>(); bias_image = bias_gpu_image_.data<half_t, cl::Image2D>();
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "set bias_image: "; VLOG(4) << "set bias_image: ";
#endif
status = kernel.setArg(++arg_idx, *bias_image); status = kernel.setArg(++arg_idx, *bias_image);
CL_CHECK_FATAL(status); CL_CHECK_FATAL(status);
} }
...@@ -1261,6 +1320,7 @@ void ConvImageCompute::DepthwiseConv2d() { ...@@ -1261,6 +1320,7 @@ void ConvImageCompute::DepthwiseConv2d() {
int w = default_work_size[1]; int w = default_work_size[1];
int nh = default_work_size[2]; int nh = default_work_size[2];
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "============ depthwise conv2d params ============"; VLOG(4) << "============ depthwise conv2d params ============";
VLOG(4) << "input_image_shape: " << input_image_shape["width"] << "," VLOG(4) << "input_image_shape: " << input_image_shape["width"] << ","
<< input_image_shape["height"]; << input_image_shape["height"];
...@@ -1282,6 +1342,7 @@ void ConvImageCompute::DepthwiseConv2d() { ...@@ -1282,6 +1342,7 @@ void ConvImageCompute::DepthwiseConv2d() {
VLOG(4) << "default work size{c_block, w, nh}: " VLOG(4) << "default work size{c_block, w, nh}: "
<< "{" << c_block << ", " << w << ", " << nh << "" << "{" << c_block << ", " << w << ", " << nh << ""
<< "}"; << "}";
#endif
CHECK_GE(dilations.size(), 2); CHECK_GE(dilations.size(), 2);
CHECK(dilations[0] == dilations[1]); CHECK(dilations[0] == dilations[1]);
...@@ -1303,9 +1364,12 @@ void ConvImageCompute::DepthwiseConv2d() { ...@@ -1303,9 +1364,12 @@ void ConvImageCompute::DepthwiseConv2d() {
STL::stringstream kernel_key; STL::stringstream kernel_key;
kernel_key << kernel_func_names_[0] << build_options_[0]; kernel_key << kernel_func_names_[0] << build_options_[0];
auto kernel = context.cl_context()->GetKernel(kernel_key.str()); auto kernel = context.cl_context()->GetKernel(kernel_key.str());
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "kernel_key: " << kernel_key.str(); VLOG(4) << "kernel_key: " << kernel_key.str();
VLOG(4) << "kernel ready ... " << kernel_key.str(); VLOG(4) << "kernel ready ... " << kernel_key.str();
VLOG(4) << "w: " << w; VLOG(4) << "w: " << w;
#endif
cl_int status; cl_int status;
int arg_idx = 0; int arg_idx = 0;
...@@ -1320,7 +1384,9 @@ void ConvImageCompute::DepthwiseConv2d() { ...@@ -1320,7 +1384,9 @@ void ConvImageCompute::DepthwiseConv2d() {
status = kernel.setArg(++arg_idx, *filter_image); status = kernel.setArg(++arg_idx, *filter_image);
CL_CHECK_FATAL(status); CL_CHECK_FATAL(status);
if (has_bias) { if (has_bias) {
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "set bias_image: "; VLOG(4) << "set bias_image: ";
#endif
status = kernel.setArg(++arg_idx, *bias_image); status = kernel.setArg(++arg_idx, *bias_image);
CL_CHECK_FATAL(status); CL_CHECK_FATAL(status);
} }
...@@ -1354,9 +1420,11 @@ void ConvImageCompute::DepthwiseConv2d() { ...@@ -1354,9 +1420,11 @@ void ConvImageCompute::DepthwiseConv2d() {
static_cast<size_t>(default_work_size.data()[1]), static_cast<size_t>(default_work_size.data()[1]),
static_cast<size_t>(default_work_size.data()[2])}; static_cast<size_t>(default_work_size.data()[2])};
#ifndef LITE_SHUTDOWN_LOG
// VLOG(4) << "out_image: " << out_image; // VLOG(4) << "out_image: " << out_image;
VLOG(4) << "global_work_size[3D]: {" << global_work_size[0] << "," VLOG(4) << "global_work_size[3D]: {" << global_work_size[0] << ","
<< global_work_size[1] << "," << global_work_size[2] << "}"; << global_work_size[1] << "," << global_work_size[2] << "}";
#endif
status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel( status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel(
kernel, kernel,
......
...@@ -41,9 +41,11 @@ void ElementwiseAddCompute::Run() { ...@@ -41,9 +41,11 @@ void ElementwiseAddCompute::Run() {
STL::stringstream kernel_key; STL::stringstream kernel_key;
kernel_key << kernel_func_name_ << build_options_; kernel_key << kernel_func_name_ << build_options_;
auto kernel = context.cl_context()->GetKernel(kernel_key.str()); auto kernel = context.cl_context()->GetKernel(kernel_key.str());
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << TargetToStr(ele_param_->X->target()); VLOG(4) << TargetToStr(ele_param_->X->target());
VLOG(4) << TargetToStr(ele_param_->Y->target()); VLOG(4) << TargetToStr(ele_param_->Y->target());
VLOG(4) << TargetToStr(ele_param_->Out->target()); VLOG(4) << TargetToStr(ele_param_->Out->target());
#endif
int arg_idx = 0; int arg_idx = 0;
cl_int status = kernel.setArg(arg_idx, *x_buf); cl_int status = kernel.setArg(arg_idx, *x_buf);
CL_CHECK_FATAL(status); CL_CHECK_FATAL(status);
...@@ -87,10 +89,12 @@ void ElementwiseAddCompute::UpdateParams() { ...@@ -87,10 +89,12 @@ void ElementwiseAddCompute::UpdateParams() {
for (int i = static_cast<int>(y_dims.size() + axis); i < x_dims.size(); ++i) { for (int i = static_cast<int>(y_dims.size() + axis); i < x_dims.size(); ++i) {
num_ *= x_dims[i]; num_ *= x_dims[i];
} }
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "axis: " << axis; VLOG(4) << "axis: " << axis;
VLOG(4) << "batch: " << batch_; VLOG(4) << "batch: " << batch_;
VLOG(4) << "channels: " << channels_; VLOG(4) << "channels: " << channels_;
VLOG(4) << "num: " << num_; VLOG(4) << "num: " << num_;
#endif
} }
} // namespace opencl } // namespace opencl
......
...@@ -62,6 +62,7 @@ void ElementwiseAddImageCompute::Run() { ...@@ -62,6 +62,7 @@ void ElementwiseAddImageCompute::Run() {
auto* out = ele_param_->Out; auto* out = ele_param_->Out;
auto axis = ele_param_->axis; auto axis = ele_param_->axis;
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "x->target():" << TargetToStr(x->target()); VLOG(4) << "x->target():" << TargetToStr(x->target());
VLOG(4) << "y->target():" << TargetToStr(y->target()); VLOG(4) << "y->target():" << TargetToStr(y->target());
VLOG(4) << "out->target():" << TargetToStr(out->target()); VLOG(4) << "out->target():" << TargetToStr(out->target());
...@@ -69,6 +70,7 @@ void ElementwiseAddImageCompute::Run() { ...@@ -69,6 +70,7 @@ void ElementwiseAddImageCompute::Run() {
VLOG(4) << "y->dims():" << y->dims(); VLOG(4) << "y->dims():" << y->dims();
VLOG(4) << "out->dims():" << out->dims(); VLOG(4) << "out->dims():" << out->dims();
VLOG(4) << "axis:" << axis; VLOG(4) << "axis:" << axis;
#endif
paddle::lite::CLImageConverterDefault default_convertor; paddle::lite::CLImageConverterDefault default_convertor;
auto x_img_shape = default_convertor.InitImageDimInfoWith(x->dims()); // w, h auto x_img_shape = default_convertor.InitImageDimInfoWith(x->dims()); // w, h
...@@ -83,10 +85,12 @@ void ElementwiseAddImageCompute::Run() { ...@@ -83,10 +85,12 @@ void ElementwiseAddImageCompute::Run() {
auto* out_img = out->mutable_data<half_t, cl::Image2D>(out_img_shape[0], auto* out_img = out->mutable_data<half_t, cl::Image2D>(out_img_shape[0],
out_img_shape[1]); out_img_shape[1]);
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "x_img_shape[w,h]:" << x_img_width << " " << x_img_height; VLOG(4) << "x_img_shape[w,h]:" << x_img_width << " " << x_img_height;
VLOG(4) << "y_img_shape[w,h]:" << y_img_shape[0] << " " << y_img_shape[1]; VLOG(4) << "y_img_shape[w,h]:" << y_img_shape[0] << " " << y_img_shape[1];
VLOG(4) << "out_img_shape[w,h]:" << out_img_shape[0] << " " VLOG(4) << "out_img_shape[w,h]:" << out_img_shape[0] << " "
<< out_img_shape[1]; << out_img_shape[1];
#endif
STL::stringstream kernel_key; STL::stringstream kernel_key;
kernel_key << kernel_func_name_ << build_options_; kernel_key << kernel_func_name_ << build_options_;
...@@ -104,8 +108,9 @@ void ElementwiseAddImageCompute::Run() { ...@@ -104,8 +108,9 @@ void ElementwiseAddImageCompute::Run() {
} else if (y_dims.size() == 1) { } else if (y_dims.size() == 1) {
if (axis == x->dims().size() - 1 || axis == x->dims().size() - 3) { if (axis == x->dims().size() - 1 || axis == x->dims().size() - 3) {
int tensor_w = x->dims()[x->dims().size() - 1]; int tensor_w = x->dims()[x->dims().size() - 1];
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "tensor_w:" << tensor_w; VLOG(4) << "tensor_w:" << tensor_w;
#endif
cl_int status = kernel.setArg(arg_idx, *x_img); cl_int status = kernel.setArg(arg_idx, *x_img);
CL_CHECK_FATAL(status); CL_CHECK_FATAL(status);
status = kernel.setArg(++arg_idx, *y_img); status = kernel.setArg(++arg_idx, *y_img);
...@@ -127,7 +132,9 @@ void ElementwiseAddImageCompute::Run() { ...@@ -127,7 +132,9 @@ void ElementwiseAddImageCompute::Run() {
auto global_work_size = cl::NDRange{static_cast<cl::size_type>(x_img_width), auto global_work_size = cl::NDRange{static_cast<cl::size_type>(x_img_width),
static_cast<cl::size_type>(x_img_height)}; static_cast<cl::size_type>(x_img_height)};
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "global_work_size:[2D]:" << x_img_width << " " << x_img_height; VLOG(4) << "global_work_size:[2D]:" << x_img_width << " " << x_img_height;
#endif
auto status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel( auto status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel(
kernel, kernel,
cl::NullRange, cl::NullRange,
......
...@@ -80,12 +80,14 @@ class ElementwiseMulImageCompute ...@@ -80,12 +80,14 @@ class ElementwiseMulImageCompute
auto* y = ele_param_->Y; auto* y = ele_param_->Y;
auto* out = ele_param_->Out; auto* out = ele_param_->Out;
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "x->target():" << TargetToStr(x->target()); VLOG(4) << "x->target():" << TargetToStr(x->target());
VLOG(4) << "y->target():" << TargetToStr(y->target()); VLOG(4) << "y->target():" << TargetToStr(y->target());
VLOG(4) << "out->target():" << TargetToStr(out->target()); VLOG(4) << "out->target():" << TargetToStr(out->target());
VLOG(4) << "x->dims():" << x->dims(); VLOG(4) << "x->dims():" << x->dims();
VLOG(4) << "y->dims():" << y->dims(); VLOG(4) << "y->dims():" << y->dims();
VLOG(4) << "out->dims():" << out->dims(); VLOG(4) << "out->dims():" << out->dims();
#endif
paddle::lite::CLImageConverterDefault default_convertor; paddle::lite::CLImageConverterDefault default_convertor;
auto x_img_shape = auto x_img_shape =
...@@ -101,10 +103,12 @@ class ElementwiseMulImageCompute ...@@ -101,10 +103,12 @@ class ElementwiseMulImageCompute
auto* out_img = out->mutable_data<half_t, cl::Image2D>(out_img_shape[0], auto* out_img = out->mutable_data<half_t, cl::Image2D>(out_img_shape[0],
out_img_shape[1]); out_img_shape[1]);
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "x_img_shape[w,h]:" << x_img_width << " " << x_img_height; VLOG(4) << "x_img_shape[w,h]:" << x_img_width << " " << x_img_height;
VLOG(4) << "y_img_shape[w,h]:" << y_img_shape[0] << " " << y_img_shape[1]; VLOG(4) << "y_img_shape[w,h]:" << y_img_shape[0] << " " << y_img_shape[1];
VLOG(4) << "out_img_shape[w,h]:" << out_img_shape[0] << " " VLOG(4) << "out_img_shape[w,h]:" << out_img_shape[0] << " "
<< out_img_shape[1]; << out_img_shape[1];
#endif
STL::stringstream kernel_key; STL::stringstream kernel_key;
kernel_key << kernel_func_name_ << build_options_; kernel_key << kernel_func_name_ << build_options_;
...@@ -123,7 +127,9 @@ class ElementwiseMulImageCompute ...@@ -123,7 +127,9 @@ class ElementwiseMulImageCompute
CL_CHECK_FATAL(status); CL_CHECK_FATAL(status);
} else if (y_dims.size() == 1 || y_dims.size() == 4) { } else if (y_dims.size() == 1 || y_dims.size() == 4) {
auto tensor_w = x_dims[x_dims.size() - 1]; auto tensor_w = x_dims[x_dims.size() - 1];
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "tensor_w:" << tensor_w; VLOG(4) << "tensor_w:" << tensor_w;
#endif
// kernel: channel_mul_d1 / channel_mul_d4 // kernel: channel_mul_d1 / channel_mul_d4
cl_int status = kernel.setArg(arg_idx, *x_img); cl_int status = kernel.setArg(arg_idx, *x_img);
CL_CHECK_FATAL(status); CL_CHECK_FATAL(status);
...@@ -136,7 +142,9 @@ class ElementwiseMulImageCompute ...@@ -136,7 +142,9 @@ class ElementwiseMulImageCompute
} else if (y_dims.size() == 2) { } else if (y_dims.size() == 2) {
if (x_dims[0] == y_dims[0] && x_dims[1] == y_dims[1]) { if (x_dims[0] == y_dims[0] && x_dims[1] == y_dims[1]) {
auto tensor_w = x_dims[x_dims.size() - 1]; auto tensor_w = x_dims[x_dims.size() - 1];
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "tensor_w:" << tensor_w; VLOG(4) << "tensor_w:" << tensor_w;
#endif
// kernel: channel_mul_d2_nc // kernel: channel_mul_d2_nc
cl_int status = kernel.setArg(arg_idx, *x_img); cl_int status = kernel.setArg(arg_idx, *x_img);
CL_CHECK_FATAL(status); CL_CHECK_FATAL(status);
...@@ -149,7 +157,9 @@ class ElementwiseMulImageCompute ...@@ -149,7 +157,9 @@ class ElementwiseMulImageCompute
} else { } else {
auto y_tensor_h = y->dims()[0]; auto y_tensor_h = y->dims()[0];
auto y_tensor_w = y->dims()[1]; auto y_tensor_w = y->dims()[1];
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "y_tensor_w:" << y_tensor_w << " y_tensor_h:" << y_tensor_h; VLOG(4) << "y_tensor_w:" << y_tensor_w << " y_tensor_h:" << y_tensor_h;
#endif
// kernel: channel_mul_d2_hw // kernel: channel_mul_d2_hw
cl_int status = kernel.setArg(arg_idx, *x_img); cl_int status = kernel.setArg(arg_idx, *x_img);
CL_CHECK_FATAL(status); CL_CHECK_FATAL(status);
...@@ -179,8 +189,9 @@ class ElementwiseMulImageCompute ...@@ -179,8 +189,9 @@ class ElementwiseMulImageCompute
event_.get()); event_.get());
CL_CHECK_FATAL(status); CL_CHECK_FATAL(status);
context.cl_wait_list()->emplace(out_img, event_); context.cl_wait_list()->emplace(out_img, event_);
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "global_work_size:[2D]:" << x_img_width << " " << x_img_height; VLOG(4) << "global_work_size:[2D]:" << x_img_width << " " << x_img_height;
#endif
} }
protected: protected:
......
...@@ -62,6 +62,7 @@ void ElementwiseSubImageCompute::Run() { ...@@ -62,6 +62,7 @@ void ElementwiseSubImageCompute::Run() {
auto* out = ele_param_->Out; auto* out = ele_param_->Out;
auto axis = ele_param_->axis; auto axis = ele_param_->axis;
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "x->target():" << TargetToStr(x->target()); VLOG(4) << "x->target():" << TargetToStr(x->target());
VLOG(4) << "y->target():" << TargetToStr(y->target()); VLOG(4) << "y->target():" << TargetToStr(y->target());
VLOG(4) << "out->target():" << TargetToStr(out->target()); VLOG(4) << "out->target():" << TargetToStr(out->target());
...@@ -69,6 +70,7 @@ void ElementwiseSubImageCompute::Run() { ...@@ -69,6 +70,7 @@ void ElementwiseSubImageCompute::Run() {
VLOG(4) << "y->dims():" << y->dims(); VLOG(4) << "y->dims():" << y->dims();
VLOG(4) << "out->dims():" << out->dims(); VLOG(4) << "out->dims():" << out->dims();
VLOG(4) << "axis:" << axis; VLOG(4) << "axis:" << axis;
#endif
paddle::lite::CLImageConverterDefault default_convertor; paddle::lite::CLImageConverterDefault default_convertor;
auto x_img_shape = default_convertor.InitImageDimInfoWith(x->dims()); // w, h auto x_img_shape = default_convertor.InitImageDimInfoWith(x->dims()); // w, h
...@@ -83,10 +85,12 @@ void ElementwiseSubImageCompute::Run() { ...@@ -83,10 +85,12 @@ void ElementwiseSubImageCompute::Run() {
auto* out_img = out->mutable_data<half_t, cl::Image2D>(out_img_shape[0], auto* out_img = out->mutable_data<half_t, cl::Image2D>(out_img_shape[0],
out_img_shape[1]); out_img_shape[1]);
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "x_img_shape[w,h]:" << x_img_width << " " << x_img_height; VLOG(4) << "x_img_shape[w,h]:" << x_img_width << " " << x_img_height;
VLOG(4) << "y_img_shape[w,h]:" << y_img_shape[0] << " " << y_img_shape[1]; VLOG(4) << "y_img_shape[w,h]:" << y_img_shape[0] << " " << y_img_shape[1];
VLOG(4) << "out_img_shape[w,h]:" << out_img_shape[0] << " " VLOG(4) << "out_img_shape[w,h]:" << out_img_shape[0] << " "
<< out_img_shape[1]; << out_img_shape[1];
#endif
STL::stringstream kernel_key; STL::stringstream kernel_key;
kernel_key << kernel_func_name_ << build_options_; kernel_key << kernel_func_name_ << build_options_;
...@@ -104,8 +108,9 @@ void ElementwiseSubImageCompute::Run() { ...@@ -104,8 +108,9 @@ void ElementwiseSubImageCompute::Run() {
} else if (y_dims.size() == 1) { } else if (y_dims.size() == 1) {
if (axis == x->dims().size() - 1 || axis == x->dims().size() - 3) { if (axis == x->dims().size() - 1 || axis == x->dims().size() - 3) {
int tensor_w = x->dims()[x->dims().size() - 1]; int tensor_w = x->dims()[x->dims().size() - 1];
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "tensor_w:" << tensor_w; VLOG(4) << "tensor_w:" << tensor_w;
#endif
cl_int status = kernel.setArg(arg_idx, *x_img); cl_int status = kernel.setArg(arg_idx, *x_img);
CL_CHECK_FATAL(status); CL_CHECK_FATAL(status);
status = kernel.setArg(++arg_idx, *y_img); status = kernel.setArg(++arg_idx, *y_img);
...@@ -127,7 +132,10 @@ void ElementwiseSubImageCompute::Run() { ...@@ -127,7 +132,10 @@ void ElementwiseSubImageCompute::Run() {
auto global_work_size = cl::NDRange{static_cast<cl::size_type>(x_img_width), auto global_work_size = cl::NDRange{static_cast<cl::size_type>(x_img_width),
static_cast<cl::size_type>(x_img_height)}; static_cast<cl::size_type>(x_img_height)};
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "global_work_size:[2D]:" << x_img_width << " " << x_img_height; VLOG(4) << "global_work_size:[2D]:" << x_img_width << " " << x_img_height;
#endif
auto status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel( auto status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel(
kernel, kernel,
cl::NullRange, cl::NullRange,
......
...@@ -57,10 +57,12 @@ class GridSamplerImageCompute : public KernelLite<TARGET(kOpenCL), ...@@ -57,10 +57,12 @@ class GridSamplerImageCompute : public KernelLite<TARGET(kOpenCL),
auto out_dims = out->dims(); auto out_dims = out->dims();
auto in_dims = x->dims(); auto in_dims = x->dims();
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "x->target():" << TargetToStr(x->target()); VLOG(4) << "x->target():" << TargetToStr(x->target());
VLOG(4) << "out->target():" << TargetToStr(out->target()); VLOG(4) << "out->target():" << TargetToStr(out->target());
VLOG(4) << "x->dims():" << in_dims; VLOG(4) << "x->dims():" << in_dims;
VLOG(4) << "out->dims():" << out_dims; VLOG(4) << "out->dims():" << out_dims;
#endif
auto out_image_shape = InitImageDimInfoWith(out_dims); auto out_image_shape = InitImageDimInfoWith(out_dims);
auto* x_img = x->data<half_t, cl::Image2D>(); auto* x_img = x->data<half_t, cl::Image2D>();
...@@ -71,10 +73,11 @@ class GridSamplerImageCompute : public KernelLite<TARGET(kOpenCL), ...@@ -71,10 +73,11 @@ class GridSamplerImageCompute : public KernelLite<TARGET(kOpenCL),
auto* out_img = out->mutable_data<half_t, cl::Image2D>( auto* out_img = out->mutable_data<half_t, cl::Image2D>(
out_image_shape["width"], out_image_shape["height"]); out_image_shape["width"], out_image_shape["height"]);
#ifndef LITE_SHUTDOWN_LOG
// VLOG(4) << "out_image" << out_img; // VLOG(4) << "out_image" << out_img;
VLOG(4) << "out_image_shape[w,h]:" << out_image_shape["width"] << " " VLOG(4) << "out_image_shape[w,h]:" << out_image_shape["width"] << " "
<< out_image_shape["height"]; << out_image_shape["height"];
#endif
STL::stringstream kernel_key; STL::stringstream kernel_key;
kernel_key << kernel_func_name_ << build_options_; kernel_key << kernel_func_name_ << build_options_;
auto kernel = context.cl_context()->GetKernel(kernel_key.str()); auto kernel = context.cl_context()->GetKernel(kernel_key.str());
...@@ -87,8 +90,10 @@ class GridSamplerImageCompute : public KernelLite<TARGET(kOpenCL), ...@@ -87,8 +90,10 @@ class GridSamplerImageCompute : public KernelLite<TARGET(kOpenCL),
DDim(std::vector<DDim::value_type>{ DDim(std::vector<DDim::value_type>{
static_cast<int64_t>(out_image_shape["width"]), static_cast<int64_t>(out_image_shape["width"]),
static_cast<int64_t>(out_image_shape["height"])})); static_cast<int64_t>(out_image_shape["height"])}));
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "default_work_size: " << default_work_size[0] << ", " VLOG(4) << "default_work_size: " << default_work_size[0] << ", "
<< default_work_size[1] << ", " << default_work_size[2]; << default_work_size[1] << ", " << default_work_size[2];
#endif
cl_int status = kernel.setArg(arg_idx++, *x_img); cl_int status = kernel.setArg(arg_idx++, *x_img);
CL_CHECK_FATAL(status); CL_CHECK_FATAL(status);
status = kernel.setArg(arg_idx++, *grid_img); status = kernel.setArg(arg_idx++, *grid_img);
...@@ -114,9 +119,10 @@ class GridSamplerImageCompute : public KernelLite<TARGET(kOpenCL), ...@@ -114,9 +119,10 @@ class GridSamplerImageCompute : public KernelLite<TARGET(kOpenCL),
event_.get()); event_.get());
CL_CHECK_FATAL(status); CL_CHECK_FATAL(status);
context.cl_wait_list()->emplace(out_img, event_); context.cl_wait_list()->emplace(out_img, event_);
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "global_work_size:[2D]:" << global_work_size[0] << " " VLOG(4) << "global_work_size:[2D]:" << global_work_size[0] << " "
<< global_work_size[1] << " " << global_work_size[2]; << global_work_size[1] << " " << global_work_size[2];
#endif
} }
protected: protected:
......
...@@ -89,19 +89,23 @@ class InstanceNormImageCompute : public KernelLite<TARGET(kOpenCL), ...@@ -89,19 +89,23 @@ class InstanceNormImageCompute : public KernelLite<TARGET(kOpenCL),
int in_h = in_dims[2]; int in_h = in_dims[2];
int in_w = in_dims[3]; int in_w = in_dims[3];
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "x->target():" << TargetToStr(x->target()); VLOG(4) << "x->target():" << TargetToStr(x->target());
VLOG(4) << "out->target():" << TargetToStr(out->target()); VLOG(4) << "out->target():" << TargetToStr(out->target());
VLOG(4) << "x->dims():" << in_dims; VLOG(4) << "x->dims():" << in_dims;
#endif
auto out_image_shape = InitImageDimInfoWith(in_dims); auto out_image_shape = InitImageDimInfoWith(in_dims);
auto* x_img = x->data<half_t, cl::Image2D>(); auto* x_img = x->data<half_t, cl::Image2D>();
auto* out_img = out->mutable_data<half_t, cl::Image2D>( auto* out_img = out->mutable_data<half_t, cl::Image2D>(
out_image_shape["width"], out_image_shape["height"]); out_image_shape["width"], out_image_shape["height"]);
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "out_image_shape[w,h]: " << out_image_shape["width"] << " " VLOG(4) << "out_image_shape[w,h]: " << out_image_shape["width"] << " "
<< out_image_shape["height"]; << out_image_shape["height"];
VLOG(4) << "in_h: " << in_h << ", in_w: " << in_w; VLOG(4) << "in_h: " << in_h << ", in_w: " << in_w;
#endif
int threads = 512; int threads = 512;
int group_size_x = (channel + 3) / 4; int group_size_x = (channel + 3) / 4;
...@@ -113,10 +117,13 @@ class InstanceNormImageCompute : public KernelLite<TARGET(kOpenCL), ...@@ -113,10 +117,13 @@ class InstanceNormImageCompute : public KernelLite<TARGET(kOpenCL),
cl::NDRange{static_cast<cl::size_type>(group_size_x * threads), cl::NDRange{static_cast<cl::size_type>(group_size_x * threads),
static_cast<cl::size_type>(group_size_y), static_cast<cl::size_type>(group_size_y),
static_cast<cl::size_type>(1)}; static_cast<cl::size_type>(1)};
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "local_work_size:[2D]:" << local_work_size[0] << " " VLOG(4) << "local_work_size:[2D]:" << local_work_size[0] << " "
<< local_work_size[1] << " " << local_work_size[2]; << local_work_size[1] << " " << local_work_size[2];
VLOG(4) << "global_work_size:[2D]:" << global_work_size[0] << " " VLOG(4) << "global_work_size:[2D]:" << global_work_size[0] << " "
<< global_work_size[1] << " " << global_work_size[2]; << global_work_size[1] << " " << global_work_size[2];
#endif
STL::stringstream kernel_key; STL::stringstream kernel_key;
kernel_key << kernel_func_name_ << build_options_; kernel_key << kernel_func_name_ << build_options_;
......
...@@ -42,11 +42,13 @@ class IoCopyHostToOpenCLCompute ...@@ -42,11 +42,13 @@ class IoCopyHostToOpenCLCompute
CHECK(param.x->target() == TARGET(kHost) || CHECK(param.x->target() == TARGET(kHost) ||
param.x->target() == TARGET(kARM)); param.x->target() == TARGET(kARM));
auto mem_size = param.x->memory_size(); auto mem_size = param.x->memory_size();
#ifndef LITE_SHUTDOWN_LOG
VLOG(2) << "param.x->memory_size():" << mem_size; VLOG(2) << "param.x->memory_size():" << mem_size;
VLOG(2) << "param.x->dims().size():" << param.x->dims().size(); VLOG(2) << "param.x->dims().size():" << param.x->dims().size();
VLOG(2) << "param.x->dims():" << param.x->dims(); VLOG(2) << "param.x->dims():" << param.x->dims();
VLOG(2) << "param.y->dims().size():" << param.y->dims().size(); VLOG(2) << "param.y->dims().size():" << param.y->dims().size();
VLOG(2) << "param.y->dims():" << param.y->dims(); VLOG(2) << "param.y->dims():" << param.y->dims();
#endif
auto* data = param.y->mutable_data(TARGET(kOpenCL), mem_size); auto* data = param.y->mutable_data(TARGET(kOpenCL), mem_size);
CopyFromHostSync(data, param.x->raw_data(), mem_size); CopyFromHostSync(data, param.x->raw_data(), mem_size);
} }
...@@ -85,12 +87,14 @@ class IoCopykOpenCLToHostCompute ...@@ -85,12 +87,14 @@ class IoCopykOpenCLToHostCompute
CHECK(param.x->target() == TARGET(kOpenCL)); CHECK(param.x->target() == TARGET(kOpenCL));
auto mem_size = param.x->memory_size(); auto mem_size = param.x->memory_size();
#ifndef LITE_SHUTDOWN_LOG
VLOG(2) << "copy size " << mem_size; VLOG(2) << "copy size " << mem_size;
VLOG(2) << "param.x->dims().size():" << param.x->dims().size(); VLOG(2) << "param.x->dims().size():" << param.x->dims().size();
VLOG(2) << "param.x->dims():" << param.x->dims(); VLOG(2) << "param.x->dims():" << param.x->dims();
VLOG(2) << "param.y->dims().size():" << param.y->dims().size(); VLOG(2) << "param.y->dims().size():" << param.y->dims().size();
VLOG(2) << "param.y->dims():" << param.y->dims(); VLOG(2) << "param.y->dims():" << param.y->dims();
VLOG(2) << "param.process_type:" << param.process_type; VLOG(2) << "param.process_type:" << param.process_type;
#endif
auto* data = param.y->mutable_data(TARGET(kHost), mem_size); auto* data = param.y->mutable_data(TARGET(kHost), mem_size);
const cl::Buffer* x_ptr; const cl::Buffer* x_ptr;
...@@ -104,7 +108,9 @@ class IoCopykOpenCLToHostCompute ...@@ -104,7 +108,9 @@ class IoCopykOpenCLToHostCompute
auto* wait_list = context.cl_wait_list(); auto* wait_list = context.cl_wait_list();
auto it = wait_list->find(x_ptr); auto it = wait_list->find(x_ptr);
if (it != wait_list->end()) { if (it != wait_list->end()) {
#ifndef LITE_SHUTDOWN_LOG
VLOG(2) << "--- Find the sync event for the target cl tensor. ---"; VLOG(2) << "--- Find the sync event for the target cl tensor. ---";
#endif
auto& event = *(it->second); auto& event = *(it->second);
event.wait(); event.wait();
} else { } else {
......
...@@ -74,6 +74,7 @@ class LayoutComputeBufferChwToImageDefault ...@@ -74,6 +74,7 @@ class LayoutComputeBufferChwToImageDefault
const int Stride1 = out_H * out_W; const int Stride1 = out_H * out_W;
const int Stride0 = out_W; const int Stride0 = out_W;
#ifndef LITE_SHUTDOWN_LOG
VLOG(2) << "param.process_type:" << param.process_type; VLOG(2) << "param.process_type:" << param.process_type;
VLOG(2) << "x_dims:" << x_dims; VLOG(2) << "x_dims:" << x_dims;
VLOG(2) << "param.x->memory_size():" << param.x->memory_size(); VLOG(2) << "param.x->memory_size():" << param.x->memory_size();
...@@ -89,6 +90,7 @@ class LayoutComputeBufferChwToImageDefault ...@@ -89,6 +90,7 @@ class LayoutComputeBufferChwToImageDefault
VLOG(2) << "Stride2:" << Stride2; VLOG(2) << "Stride2:" << Stride2;
VLOG(2) << "Stride1:" << Stride1; VLOG(2) << "Stride1:" << Stride1;
VLOG(2) << "Stride0:" << Stride0; VLOG(2) << "Stride0:" << Stride0;
#endif
auto& context = ctx_->As<OpenCLContext>(); auto& context = ctx_->As<OpenCLContext>();
CHECK(context.cl_context() != nullptr); CHECK(context.cl_context() != nullptr);
...@@ -177,6 +179,7 @@ class LayoutComputeImageDefaultToBufferChw ...@@ -177,6 +179,7 @@ class LayoutComputeImageDefaultToBufferChw
new_dims[4 - x_dims.size() + j] = x_dims[j]; new_dims[4 - x_dims.size() + j] = x_dims[j];
} }
#ifndef LITE_SHUTDOWN_LOG
VLOG(2) << "param.process_type:" << param.process_type; VLOG(2) << "param.process_type:" << param.process_type;
VLOG(2) << "x_dims:" << x_dims; VLOG(2) << "x_dims:" << x_dims;
VLOG(2) << "param.x->memory_size():" << param.x->memory_size(); VLOG(2) << "param.x->memory_size():" << param.x->memory_size();
...@@ -186,6 +189,7 @@ class LayoutComputeImageDefaultToBufferChw ...@@ -186,6 +189,7 @@ class LayoutComputeImageDefaultToBufferChw
<< new_dims[1] << " " << new_dims[2] << " " << new_dims[3]; << new_dims[1] << " " << new_dims[2] << " " << new_dims[3];
VLOG(2) << "y_dims:" << y_dims; VLOG(2) << "y_dims:" << y_dims;
VLOG(2) << "param.y->memory_size():" << param.y->memory_size(); VLOG(2) << "param.y->memory_size():" << param.y->memory_size();
#endif
size_t C = new_dims[1]; size_t C = new_dims[1];
size_t in_height = new_dims[2]; size_t in_height = new_dims[2];
...@@ -217,8 +221,10 @@ class LayoutComputeImageDefaultToBufferChw ...@@ -217,8 +221,10 @@ class LayoutComputeImageDefaultToBufferChw
CL_CHECK_FATAL(status); CL_CHECK_FATAL(status);
status = kernel.setArg(++arg_idx, static_cast<const int>(C)); status = kernel.setArg(++arg_idx, static_cast<const int>(C));
CL_CHECK_FATAL(status); CL_CHECK_FATAL(status);
#ifndef LITE_SHUTDOWN_LOG
VLOG(2) << "gws:[3D]" << ((new_dims[1] + 3) / 4) << " " << new_dims[3] VLOG(2) << "gws:[3D]" << ((new_dims[1] + 3) / 4) << " " << new_dims[3]
<< " " << (new_dims[0] * new_dims[2]); << " " << (new_dims[0] * new_dims[2]);
#endif
auto global_work_size = auto global_work_size =
cl::NDRange{static_cast<cl::size_type>((new_dims[1] + 3) / 4), cl::NDRange{static_cast<cl::size_type>((new_dims[1] + 3) / 4),
static_cast<cl::size_type>(new_dims[3]), static_cast<cl::size_type>(new_dims[3]),
......
...@@ -65,6 +65,7 @@ class LrnImageCompute : public KernelLite<TARGET(kOpenCL), ...@@ -65,6 +65,7 @@ class LrnImageCompute : public KernelLite<TARGET(kOpenCL),
auto out_dims = out->dims(); auto out_dims = out->dims();
auto in_dims = x->dims(); auto in_dims = x->dims();
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "x->target(): " << TargetToStr(x->target()); VLOG(4) << "x->target(): " << TargetToStr(x->target());
VLOG(4) << "out->target(): " << TargetToStr(out->target()); VLOG(4) << "out->target(): " << TargetToStr(out->target());
VLOG(4) << "x->dims(): " << in_dims; VLOG(4) << "x->dims(): " << in_dims;
...@@ -74,6 +75,7 @@ class LrnImageCompute : public KernelLite<TARGET(kOpenCL), ...@@ -74,6 +75,7 @@ class LrnImageCompute : public KernelLite<TARGET(kOpenCL),
VLOG(4) << "alpha: " << alpha_; VLOG(4) << "alpha: " << alpha_;
VLOG(4) << "beta: " << beta_; VLOG(4) << "beta: " << beta_;
VLOG(4) << "norm_region: " << norm_region_; VLOG(4) << "norm_region: " << norm_region_;
#endif
auto out_image_shape = InitImageDimInfoWith(out_dims); auto out_image_shape = InitImageDimInfoWith(out_dims);
auto* x_img = x->data<half_t, cl::Image2D>(); auto* x_img = x->data<half_t, cl::Image2D>();
...@@ -81,9 +83,12 @@ class LrnImageCompute : public KernelLite<TARGET(kOpenCL), ...@@ -81,9 +83,12 @@ class LrnImageCompute : public KernelLite<TARGET(kOpenCL),
auto* out_img = out->mutable_data<half_t, cl::Image2D>( auto* out_img = out->mutable_data<half_t, cl::Image2D>(
out_image_shape["width"], out_image_shape["height"]); out_image_shape["width"], out_image_shape["height"]);
#ifndef LITE_SHUTDOWN_LOG
// VLOG(4) << "out_image" << out_img; // VLOG(4) << "out_image" << out_img;
VLOG(4) << "out_image_shape[w,h]:" << out_image_shape["width"] << " " VLOG(4) << "out_image_shape[w,h]:" << out_image_shape["width"] << " "
<< out_image_shape["height"]; << out_image_shape["height"];
#endif
STL::stringstream kernel_key; STL::stringstream kernel_key;
kernel_key << kernel_func_name_ << build_options_; kernel_key << kernel_func_name_ << build_options_;
...@@ -97,8 +102,10 @@ class LrnImageCompute : public KernelLite<TARGET(kOpenCL), ...@@ -97,8 +102,10 @@ class LrnImageCompute : public KernelLite<TARGET(kOpenCL),
DDim(std::vector<DDim::value_type>{ DDim(std::vector<DDim::value_type>{
static_cast<int64_t>(out_image_shape["width"]), static_cast<int64_t>(out_image_shape["width"]),
static_cast<int64_t>(out_image_shape["height"])})); static_cast<int64_t>(out_image_shape["height"])}));
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "default_work_size: " << default_work_size[0] << ", " VLOG(4) << "default_work_size: " << default_work_size[0] << ", "
<< default_work_size[1] << ", " << default_work_size[3]; << default_work_size[1] << ", " << default_work_size[3];
#endif
cl_int status = kernel.setArg(arg_idx++, *x_img); cl_int status = kernel.setArg(arg_idx++, *x_img);
CL_CHECK_FATAL(status); CL_CHECK_FATAL(status);
status = kernel.setArg(arg_idx++, *out_img); status = kernel.setArg(arg_idx++, *out_img);
...@@ -130,9 +137,10 @@ class LrnImageCompute : public KernelLite<TARGET(kOpenCL), ...@@ -130,9 +137,10 @@ class LrnImageCompute : public KernelLite<TARGET(kOpenCL),
event_.get()); event_.get());
CL_CHECK_FATAL(status); CL_CHECK_FATAL(status);
context.cl_wait_list()->emplace(out_img, event_); context.cl_wait_list()->emplace(out_img, event_);
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "global_work_size:[2D]:" << global_work_size[0] << " " VLOG(4) << "global_work_size:[2D]:" << global_work_size[0] << " "
<< global_work_size[1] << " " << global_work_size[2]; << global_work_size[1] << " " << global_work_size[2];
#endif
} }
protected: protected:
......
...@@ -87,6 +87,7 @@ class NearestInterpComputeImageDefault ...@@ -87,6 +87,7 @@ class NearestInterpComputeImageDefault
status = kernel.setArg(++arg_idx, static_cast<const int>(out_dims_w)); status = kernel.setArg(++arg_idx, static_cast<const int>(out_dims_w));
CL_CHECK_FATAL(status); CL_CHECK_FATAL(status);
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << TargetToStr(param.X->target()); VLOG(4) << TargetToStr(param.X->target());
VLOG(4) << TargetToStr(param.Out->target()); VLOG(4) << TargetToStr(param.Out->target());
VLOG(4) << "out_image_shape(w,h):" << out_image_shape["width"] << " " VLOG(4) << "out_image_shape(w,h):" << out_image_shape["width"] << " "
...@@ -95,6 +96,7 @@ class NearestInterpComputeImageDefault ...@@ -95,6 +96,7 @@ class NearestInterpComputeImageDefault
<< x_dims[1] << " " << x_dims[2] << " " << x_dims[3]; << x_dims[1] << " " << x_dims[2] << " " << x_dims[3];
VLOG(4) << "y_dims[" << y_dims.size() << "D]:" << y_dims[0] << " " VLOG(4) << "y_dims[" << y_dims.size() << "D]:" << y_dims[0] << " "
<< y_dims[1] << " " << y_dims[2] << " " << y_dims[3]; << y_dims[1] << " " << y_dims[2] << " " << y_dims[3];
#endif
const std::vector<size_t>& default_work_size = const std::vector<size_t>& default_work_size =
DefaultWorkSize(y_dims, DefaultWorkSize(y_dims,
......
...@@ -71,10 +71,12 @@ class Pad2dCompute : public KernelLite<TARGET(kOpenCL), ...@@ -71,10 +71,12 @@ class Pad2dCompute : public KernelLite<TARGET(kOpenCL),
int out_h = out_dims[2]; int out_h = out_dims[2];
int out_w = out_dims[3]; int out_w = out_dims[3];
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "x->target():" << TargetToStr(x->target()); VLOG(4) << "x->target():" << TargetToStr(x->target());
VLOG(4) << "out->target():" << TargetToStr(out->target()); VLOG(4) << "out->target():" << TargetToStr(out->target());
VLOG(4) << "x->dims():" << in_dims; VLOG(4) << "x->dims():" << in_dims;
VLOG(4) << "out->dims():" << out_dims; VLOG(4) << "out->dims():" << out_dims;
#endif
auto out_image_shape = InitImageDimInfoWith(out_dims); auto out_image_shape = InitImageDimInfoWith(out_dims);
auto* x_img = x->data<half_t, cl::Image2D>(); auto* x_img = x->data<half_t, cl::Image2D>();
...@@ -82,11 +84,13 @@ class Pad2dCompute : public KernelLite<TARGET(kOpenCL), ...@@ -82,11 +84,13 @@ class Pad2dCompute : public KernelLite<TARGET(kOpenCL),
auto* out_img = out->mutable_data<half_t, cl::Image2D>( auto* out_img = out->mutable_data<half_t, cl::Image2D>(
out_image_shape["width"], out_image_shape["height"]); out_image_shape["width"], out_image_shape["height"]);
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "out_image_shape[w,h]: " << out_image_shape["width"] << " " VLOG(4) << "out_image_shape[w,h]: " << out_image_shape["width"] << " "
<< out_image_shape["height"]; << out_image_shape["height"];
VLOG(4) << "in_h: " << in_h << ", in_w: " << in_w; VLOG(4) << "in_h: " << in_h << ", in_w: " << in_w;
VLOG(4) << "out_h: " << out_h << ", out_w: " << out_w; VLOG(4) << "out_h: " << out_h << ", out_w: " << out_w;
#endif
STL::stringstream kernel_key; STL::stringstream kernel_key;
kernel_key << kernel_func_name_ << build_options_; kernel_key << kernel_func_name_ << build_options_;
...@@ -98,9 +102,10 @@ class Pad2dCompute : public KernelLite<TARGET(kOpenCL), ...@@ -98,9 +102,10 @@ class Pad2dCompute : public KernelLite<TARGET(kOpenCL),
DDim(std::vector<DDim::value_type>{ DDim(std::vector<DDim::value_type>{
static_cast<int64_t>(out_image_shape["width"]), static_cast<int64_t>(out_image_shape["width"]),
static_cast<int64_t>(out_image_shape["height"])})); static_cast<int64_t>(out_image_shape["height"])}));
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "default_work_size: " << default_work_size[0] << ", " VLOG(4) << "default_work_size: " << default_work_size[0] << ", "
<< default_work_size[1] << ", " << default_work_size[2]; << default_work_size[1] << ", " << default_work_size[2];
#endif
int pad_h0 = pad2d_param_->paddings[0]; int pad_h0 = pad2d_param_->paddings[0];
int pad_h1 = pad2d_param_->paddings[1]; int pad_h1 = pad2d_param_->paddings[1];
int pad_w0 = pad2d_param_->paddings[2]; int pad_w0 = pad2d_param_->paddings[2];
...@@ -144,9 +149,10 @@ class Pad2dCompute : public KernelLite<TARGET(kOpenCL), ...@@ -144,9 +149,10 @@ class Pad2dCompute : public KernelLite<TARGET(kOpenCL),
event_.get()); event_.get());
CL_CHECK_FATAL(status); CL_CHECK_FATAL(status);
context.cl_wait_list()->emplace(out_img, event_); context.cl_wait_list()->emplace(out_img, event_);
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "global_work_size:[2D]:" << global_work_size[0] << " " VLOG(4) << "global_work_size:[2D]:" << global_work_size[0] << " "
<< global_work_size[1] << " " << global_work_size[2]; << global_work_size[1] << " " << global_work_size[2];
#endif
} }
protected: protected:
......
...@@ -59,10 +59,14 @@ class PoolComputeImage2D : public KernelLite<TARGET(kOpenCL), ...@@ -59,10 +59,14 @@ class PoolComputeImage2D : public KernelLite<TARGET(kOpenCL),
std::vector<int> paddings = *param.paddings; std::vector<int> paddings = *param.paddings;
std::vector<int> strides = param.strides; std::vector<int> strides = param.strides;
std::vector<int> ksize = param.ksize; std::vector<int> ksize = param.ksize;
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "global_pooling: " << global_pooling; VLOG(4) << "global_pooling: " << global_pooling;
VLOG(4) << "pooling_type: " << pooling_type; VLOG(4) << "pooling_type: " << pooling_type;
VLOG(4) << "paddings : " << paddings[0] << " " << paddings[1] << " " VLOG(4) << "paddings : " << paddings[0] << " " << paddings[1] << " "
<< paddings[2] << " " << paddings[3] << " "; << paddings[2] << " " << paddings[3] << " ";
#endif
if (global_pooling) { if (global_pooling) {
for (size_t i = 0; i < ksize.size(); ++i) { for (size_t i = 0; i < ksize.size(); ++i) {
paddings[2 * i] = 0; paddings[2 * i] = 0;
...@@ -70,6 +74,8 @@ class PoolComputeImage2D : public KernelLite<TARGET(kOpenCL), ...@@ -70,6 +74,8 @@ class PoolComputeImage2D : public KernelLite<TARGET(kOpenCL),
ksize[i] = static_cast<int>(in_dims[i + 2]); ksize[i] = static_cast<int>(in_dims[i + 2]);
} }
} }
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "in_dims : [" << in_dims.size() << "]" << in_dims[0] << " " VLOG(4) << "in_dims : [" << in_dims.size() << "]" << in_dims[0] << " "
<< in_dims[1] << " " << in_dims[2] << " " << in_dims[3]; << in_dims[1] << " " << in_dims[2] << " " << in_dims[3];
VLOG(4) << "out_dims : [" << out_dims.size() << "]" << out_dims[0] << " " VLOG(4) << "out_dims : [" << out_dims.size() << "]" << out_dims[0] << " "
...@@ -82,6 +88,8 @@ class PoolComputeImage2D : public KernelLite<TARGET(kOpenCL), ...@@ -82,6 +88,8 @@ class PoolComputeImage2D : public KernelLite<TARGET(kOpenCL),
<< ksize[1] << " " << ksize[2] << " " << ksize[3]; << ksize[1] << " " << ksize[2] << " " << ksize[3];
VLOG(4) << "paddings : [" << paddings.size() << "]" << paddings[0] << " " VLOG(4) << "paddings : [" << paddings.size() << "]" << paddings[0] << " "
<< paddings[1] << " " << paddings[2] << " " << paddings[3]; << paddings[1] << " " << paddings[2] << " " << paddings[3];
#endif
bool pads_equal = bool pads_equal =
(paddings[0] == paddings[1]) && (paddings[2] == paddings[3]); (paddings[0] == paddings[1]) && (paddings[2] == paddings[3]);
if (!pads_equal) { if (!pads_equal) {
...@@ -95,8 +103,10 @@ class PoolComputeImage2D : public KernelLite<TARGET(kOpenCL), ...@@ -95,8 +103,10 @@ class PoolComputeImage2D : public KernelLite<TARGET(kOpenCL),
// VLOG(4) << "x_image" << x_img; // VLOG(4) << "x_image" << x_img;
auto out_image_shape = InitImageDimInfoWith(out_dims); auto out_image_shape = InitImageDimInfoWith(out_dims);
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "out_image_shape = " << out_image_shape["width"] << " " VLOG(4) << "out_image_shape = " << out_image_shape["width"] << " "
<< out_image_shape["height"]; << out_image_shape["height"];
#endif
auto* out_img = param.output->mutable_data<half_t, cl::Image2D>( auto* out_img = param.output->mutable_data<half_t, cl::Image2D>(
out_image_shape["width"], out_image_shape["height"]); out_image_shape["width"], out_image_shape["height"]);
// VLOG(4) << "out_image" << out_img; // VLOG(4) << "out_image" << out_img;
...@@ -109,8 +119,10 @@ class PoolComputeImage2D : public KernelLite<TARGET(kOpenCL), ...@@ -109,8 +119,10 @@ class PoolComputeImage2D : public KernelLite<TARGET(kOpenCL),
int w = out_dims[3]; int w = out_dims[3];
int nh = out_dims[0] * out_dims[2]; int nh = out_dims[0] * out_dims[2];
auto global_work_size = cl::NDRange(c_block, w, nh); auto global_work_size = cl::NDRange(c_block, w, nh);
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "global_work_size : [" << 3 << "]" << c_block << " " << w VLOG(4) << "global_work_size : [" << 3 << "]" << c_block << " " << w
<< " " << nh << " "; << " " << nh << " ";
#endif
cl_int status; cl_int status;
int arg_idx = 0; int arg_idx = 0;
status = kernel.setArg(arg_idx, *x_img); status = kernel.setArg(arg_idx, *x_img);
......
...@@ -41,8 +41,6 @@ class ReshapeComputeFloatImage : public KernelLite<TARGET(kOpenCL), ...@@ -41,8 +41,6 @@ class ReshapeComputeFloatImage : public KernelLite<TARGET(kOpenCL),
} }
void Run() override { void Run() override {
VLOG(4) << "reshape_compute run ... ";
auto& param = *param_.get_mutable<param_t>(); auto& param = *param_.get_mutable<param_t>();
const Tensor* const x = param.x; const Tensor* const x = param.x;
...@@ -64,8 +62,9 @@ class ReshapeComputeFloatImage : public KernelLite<TARGET(kOpenCL), ...@@ -64,8 +62,9 @@ class ReshapeComputeFloatImage : public KernelLite<TARGET(kOpenCL),
InitImageDimInfoWith(out_dims); InitImageDimInfoWith(out_dims);
cl::Image2D* const out_image = output->mutable_data<half_t, cl::Image2D>( cl::Image2D* const out_image = output->mutable_data<half_t, cl::Image2D>(
out_image_shape.at("width"), out_image_shape.at("height")); out_image_shape.at("width"), out_image_shape.at("height"));
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "out_dims= " << out_dims; VLOG(4) << "out_dims= " << out_dims;
#endif
const std::vector<size_t>& default_work_size = DefaultWorkSize( const std::vector<size_t>& default_work_size = DefaultWorkSize(
out_dims, out_dims,
DDim(std::vector<DDim::value_type>{ DDim(std::vector<DDim::value_type>{
...@@ -94,6 +93,8 @@ class ReshapeComputeFloatImage : public KernelLite<TARGET(kOpenCL), ...@@ -94,6 +93,8 @@ class ReshapeComputeFloatImage : public KernelLite<TARGET(kOpenCL),
int out_Stride0 = out_W; int out_Stride0 = out_W;
int out_Stride1 = out_H * out_W; int out_Stride1 = out_H * out_W;
int out_Stride2 = out_C * out_H * out_W; int out_Stride2 = out_C * out_H * out_W;
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "out_C=" << out_C; VLOG(4) << "out_C=" << out_C;
VLOG(4) << "out_H=" << out_H; VLOG(4) << "out_H=" << out_H;
VLOG(4) << "out_W=" << out_W; VLOG(4) << "out_W=" << out_W;
...@@ -104,17 +105,20 @@ class ReshapeComputeFloatImage : public KernelLite<TARGET(kOpenCL), ...@@ -104,17 +105,20 @@ class ReshapeComputeFloatImage : public KernelLite<TARGET(kOpenCL),
VLOG(4) << "in_Stride1=" << in_Stride1; VLOG(4) << "in_Stride1=" << in_Stride1;
VLOG(4) << "out_Stride0=" << out_Stride0; VLOG(4) << "out_Stride0=" << out_Stride0;
VLOG(4) << "out_Stride1=" << out_Stride1; VLOG(4) << "out_Stride1=" << out_Stride1;
#endif
auto& context = ctx_->As<OpenCLContext>(); auto& context = ctx_->As<OpenCLContext>();
CHECK(context.cl_context() != nullptr); CHECK(context.cl_context() != nullptr);
STL::stringstream kernel_key; STL::stringstream kernel_key;
kernel_key << kernel_func_name_ << build_options_; kernel_key << kernel_func_name_ << build_options_;
auto kernel = context.cl_context()->GetKernel(kernel_key.str()); auto kernel = context.cl_context()->GetKernel(kernel_key.str());
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << TargetToStr(x->target()); VLOG(4) << TargetToStr(x->target());
VLOG(4) << TargetToStr(param.output->target()); VLOG(4) << TargetToStr(param.output->target());
#endif
int arg_idx = 0; int arg_idx = 0;
cl_int status; cl_int status;
status = kernel.setArg(arg_idx, *x_image); status = kernel.setArg(arg_idx, *x_image);
CL_CHECK_FATAL(status); CL_CHECK_FATAL(status);
......
...@@ -51,8 +51,10 @@ class ScaleComputeImage2D : public KernelLite<TARGET(kOpenCL), ...@@ -51,8 +51,10 @@ class ScaleComputeImage2D : public KernelLite<TARGET(kOpenCL),
// LOG(INFO) << "x_image" << x_img; // LOG(INFO) << "x_image" << x_img;
auto out_image_shape = InitImageDimInfoWith(in_dims); auto out_image_shape = InitImageDimInfoWith(in_dims);
LOG(INFO) << "out_image_shape = " << out_image_shape["width"] << " " #ifndef LITE_SHUTDOWN_LOG
<< out_image_shape["height"]; VLOG(4) << "out_image_shape = " << out_image_shape["width"] << " "
<< out_image_shape["height"];
#endif
auto* out_img = param.output->mutable_data<half_t, cl::Image2D>( auto* out_img = param.output->mutable_data<half_t, cl::Image2D>(
out_image_shape["width"], out_image_shape["height"]); out_image_shape["width"], out_image_shape["height"]);
// LOG(INFO) << "out_image" << out_img; // LOG(INFO) << "out_image" << out_img;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册