提交 730ca937 编写于 作者: Y Yuan Shuai 提交者: GitHub

[LITE][OPENCL] Comment vlog for opencl (#3252)


* [LITE][OPENCL] comment vlog for opencl kernel. test=develop
上级 1c5b0583
......@@ -101,6 +101,7 @@ class ActivationComputeImageDefault
status = kernel.setArg(++arg_idx, scale_);
CL_CHECK_FATAL(status);
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << TargetToStr(param.X->target());
VLOG(4) << TargetToStr(param.Out->target());
VLOG(4) << "image_shape(w,h):" << image_shape["width"] << " "
......@@ -112,6 +113,7 @@ class ActivationComputeImageDefault
VLOG(4) << "threshold:" << threshold_;
VLOG(4) << "scale:" << scale_;
VLOG(4) << "kernel func name:" << kernel_func_name_;
#endif
auto global_work_size =
cl::NDRange{static_cast<cl::size_type>(image_shape["width"]),
......
......@@ -77,17 +77,21 @@ class BilinearInterpImageCompute
int out_h = out_dims[2];
int out_w = out_dims[3];
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "x->target():" << TargetToStr(x->target());
VLOG(4) << "out->target():" << TargetToStr(out->target());
VLOG(4) << "x->dims():" << in_dims;
VLOG(4) << "out->dims():" << out_dims;
#endif
auto out_image_shape = InitImageDimInfoWith(out_dims);
auto* x_img = x->data<half_t, cl::Image2D>();
// VLOG(4) << "x_image: " << x_img;
auto* out_img = out->mutable_data<half_t, cl::Image2D>(
out_image_shape["width"], out_image_shape["height"]);
#ifndef LITE_SHUTDOWN_LOG
// VLOG(4) << "x_image: " << x_img;
// VLOG(4) << "out_image: " << out_img;
VLOG(4) << "out_image_shape[w,h]: " << out_image_shape["width"] << " "
<< out_image_shape["height"];
......@@ -96,6 +100,7 @@ class BilinearInterpImageCompute
<< ", align_delta: " << align_delta;
VLOG(4) << "in_h: " << in_h << ", in_w: " << in_w;
VLOG(4) << "out_h: " << out_h << ", out_w: " << out_w;
#endif
STL::stringstream kernel_key;
kernel_key << kernel_func_name_ << build_options_;
......@@ -107,8 +112,10 @@ class BilinearInterpImageCompute
DDim(std::vector<DDim::value_type>{
static_cast<int64_t>(out_image_shape["width"]),
static_cast<int64_t>(out_image_shape["height"])}));
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "default_work_size: " << default_work_size[0] << ", "
<< default_work_size[1] << ", " << default_work_size[2];
#endif
cl_int status = kernel.setArg(arg_idx++, *x_img);
CL_CHECK_FATAL(status);
status = kernel.setArg(arg_idx++, *out_img);
......@@ -142,9 +149,10 @@ class BilinearInterpImageCompute
event_.get());
CL_CHECK_FATAL(status);
context.cl_wait_list()->emplace(out_img, event_);
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "global_work_size:[2D]:" << global_work_size[0] << " "
<< global_work_size[1] << " " << global_work_size[2];
#endif
}
protected:
......
......@@ -123,7 +123,8 @@ class ConcatComputeImage : public KernelLite<TARGET(kOpenCL),
int arg_idx = 0;
int width = inputs[0]->dims()[inputs[0]->dims().size() - 1];
VLOG(4) << "concat 输入尺寸: ";
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "concat input shape: ";
for (size_t i = 0; i < inputs.size(); i++) {
VLOG(4) << "inputs [" << i << "]"
<< "[" << inputs[i]->dims().size() << "D]:"
......@@ -132,12 +133,13 @@ class ConcatComputeImage : public KernelLite<TARGET(kOpenCL),
<< inputs[i]->dims()[3];
}
VLOG(4) << "concat 输出尺寸: ";
VLOG(4) << "concat output shape: ";
VLOG(4) << " out dims: "
<< "[" << x_dims.size() << "D]:" << x_dims[0] << " " << x_dims[1]
<< " " << x_dims[2] << " " << x_dims[3];
VLOG(4) << "axis_: " << axis_;
VLOG(4) << "flag_: " << flag_;
#endif
auto global_work_size =
cl::NDRange{static_cast<cl::size_type>(x_dims[x_dims.size() - 1]),
......@@ -145,6 +147,7 @@ class ConcatComputeImage : public KernelLite<TARGET(kOpenCL),
x_dims[x_dims.size() - 1]),
static_cast<cl::size_type>(image_shape["height"])};
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << TargetToStr(param.output->target());
VLOG(4) << "image_shape(w,h):" << image_shape["width"] << " "
<< image_shape["height"];
......@@ -157,6 +160,7 @@ class ConcatComputeImage : public KernelLite<TARGET(kOpenCL),
VLOG(4) << "global_work_size: " << x_dims[x_dims.size() - 1] << " "
<< (image_shape["width"] / x_dims[x_dims.size() - 1]) << " "
<< (image_shape["height"]);
#endif
auto kernel = context.cl_context()->GetKernel(kernel_key.str());
int out_w = x_dims[x_dims.size() - 1];
......@@ -198,8 +202,10 @@ class ConcatComputeImage : public KernelLite<TARGET(kOpenCL),
image_shape = InitImageDimInfoWith(in_dims);
auto* x_buf = inputs[i]->data<half_t, cl::Image2D>();
int in_w = in_dims[in_dims.size() - 1];
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "image_shape(w,h):" << image_shape["width"] << " "
<< image_shape["height"];
#endif
global_work_size =
cl::NDRange{static_cast<cl::size_type>(in_dims[in_dims.size() - 1]),
static_cast<cl::size_type>(image_shape["width"] /
......
......@@ -78,6 +78,7 @@ void ConvImageCompute::PrepareForRun() {
VLOG(3) << "dilation_equal:" << dilation_equal;
VLOG(3) << "padding :" << paddings[0] << " " << paddings[1] << " "
<< paddings[2] << " " << paddings[3];
CHECK(pad_equal && stride_equal && dilation_equal);
if (kernel_h == 1 && kernel_w == 1) {
......@@ -269,6 +270,7 @@ void ConvImageCompute::Conv2d1x1() {
int w = default_work_size[1];
int nh = default_work_size[2];
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "============ conv2d_1x1 params ============";
VLOG(4) << "input_image_shape: " << input_image_shape["width"] << ","
<< input_image_shape["height"];
......@@ -290,7 +292,7 @@ void ConvImageCompute::Conv2d1x1() {
VLOG(4) << "default work size{c_block, w, nh}: "
<< "{" << c_block << ", " << w << ", " << nh << ""
<< "}";
#endif
CHECK_GE(dilations.size(), 2);
CHECK(dilations[0] == dilations[1]);
CHECK_GE(input_dims.size(), 4);
......@@ -313,10 +315,12 @@ void ConvImageCompute::Conv2d1x1() {
auto kernel = context.cl_context()->GetKernel(kernel_key.str());
int maped_w = maptofactor(w, 4);
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "kernel_key: " << kernel_key.str();
VLOG(4) << "kernel ready ... " << kernel_key.str();
VLOG(4) << "maped_w: " << maped_w;
VLOG(4) << "hasbias: " << has_bias;
#endif
cl_int status;
int arg_idx = 0;
......@@ -363,21 +367,27 @@ void ConvImageCompute::Conv2d1x1() {
static_cast<size_t>(maped_w),
static_cast<size_t>(default_work_size.data()[2])};
#ifndef LITE_SHUTDOWN_LOG
// VLOG(4) << "out_image: " << out_image;
VLOG(4) << "global_work_size[3D]: {" << global_work_size[0] << ","
<< global_work_size[1] << "," << global_work_size[2] << "}";
#endif
size_t max_work_group_size = 0;
kernel.getWorkGroupInfo<size_t>(CLRuntime::Global()->device(),
CL_KERNEL_WORK_GROUP_SIZE,
&max_work_group_size);
cl::NDRange local_work_size = cl::NullRange;
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "max_work_group_size: " << max_work_group_size;
#endif
if (max_work_group_size > 0 && use_lws) {
local_work_size = context.cl_context()->LocalWorkSize(global_work_size,
max_work_group_size);
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "local_work_size[3D]: {" << local_work_size[0] << ","
<< local_work_size[1] << "," << local_work_size[2] << "}";
#endif
}
status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel(
......@@ -453,6 +463,7 @@ void ConvImageCompute::Conv2d3x3() {
int w = default_work_size[1];
int nh = default_work_size[2];
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "============ conv2d params ============";
VLOG(4) << "input_image_shape: " << input_image_shape["width"] << ","
<< input_image_shape["height"];
......@@ -477,6 +488,7 @@ void ConvImageCompute::Conv2d3x3() {
VLOG(4) << "default work size{c_block, w, nh}: "
<< "{" << c_block << ", " << w << ", " << nh << ""
<< "}";
#endif
CHECK_GE(dilations.size(), 2);
CHECK(dilations[0] == dilations[1]);
......@@ -496,9 +508,12 @@ void ConvImageCompute::Conv2d3x3() {
STL::stringstream kernel_key;
kernel_key << kernel_func_names_[0] << build_options_[0];
auto kernel = context.cl_context()->GetKernel(kernel_key.str());
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "kernel_key: " << kernel_key.str();
VLOG(4) << "kernel ready ... " << kernel_key.str();
VLOG(4) << "w: " << w;
#endif
cl_int status;
int arg_idx = 0;
......@@ -513,7 +528,9 @@ void ConvImageCompute::Conv2d3x3() {
status = kernel.setArg(++arg_idx, *filter_image);
CL_CHECK_FATAL(status);
if (has_bias) {
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "set bias_image: ";
#endif
status = kernel.setArg(++arg_idx, *bias_image);
CL_CHECK_FATAL(status);
}
......@@ -553,9 +570,11 @@ void ConvImageCompute::Conv2d3x3() {
static_cast<size_t>(default_work_size.data()[1]),
static_cast<size_t>(default_work_size.data()[2])};
#ifndef LITE_SHUTDOWN_LOG
// VLOG(4) << "out_image: " << out_image;
VLOG(4) << "global_work_size[3D]: {" << global_work_size[0] << ","
<< global_work_size[1] << "," << global_work_size[2] << "}";
#endif
status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel(
kernel,
......@@ -611,8 +630,9 @@ void ConvImageCompute::Conv2d3x3opt() {
int h_blk_size = 1;
int h_blk = (nh + h_blk_size - 1) / h_blk_size;
// default_work_size[2] = h_blk;
// default_work_size[2] = h_blk;
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "============ conv2d params ============";
// VLOG(4) << "input_image_shape: " << input_image_shape["width"] << ","
// << input_image_shape["height"];
......@@ -632,6 +652,7 @@ void ConvImageCompute::Conv2d3x3opt() {
VLOG(4) << "default work size{c_block, w, nh}: "
<< "{" << c_block << ", " << w << ", " << nh << ""
<< "}";
#endif
CHECK_GE(dilations.size(), 2);
CHECK(dilations[0] == dilations[1]);
......@@ -651,8 +672,11 @@ void ConvImageCompute::Conv2d3x3opt() {
STL::stringstream kernel_key;
kernel_key << kernel_func_names_[0] << build_options_[0];
auto kernel = context.cl_context()->GetKernel(kernel_key.str());
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "kernel_key: " << kernel_key.str();
VLOG(4) << "kernel ready ... " << kernel_key.str();
#endif
cl_int status;
int arg_idx = 0;
......@@ -667,7 +691,9 @@ void ConvImageCompute::Conv2d3x3opt() {
status = kernel.setArg(++arg_idx, *filter_image);
CL_CHECK_FATAL(status);
if (has_bias) {
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "set bias_image: ";
#endif
status = kernel.setArg(++arg_idx, *bias_image);
CL_CHECK_FATAL(status);
}
......@@ -696,22 +722,27 @@ void ConvImageCompute::Conv2d3x3opt() {
cl::NDRange{static_cast<size_t>(default_work_size.data()[0]),
static_cast<size_t>(w_blk),
static_cast<size_t>(h_blk)};
#ifndef LITE_SHUTDOWN_LOG
// VLOG(4) << "out_image: " << out_image;
VLOG(4) << "global_work_size[3D]: {" << global_work_size[0] << ","
<< global_work_size[1] << "," << global_work_size[2] << "}";
#endif
size_t max_work_group_size = 0;
kernel.getWorkGroupInfo<size_t>(CLRuntime::Global()->device(),
CL_KERNEL_WORK_GROUP_SIZE,
&max_work_group_size);
cl::NDRange local_work_size = cl::NullRange;
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "max_work_group_size: " << max_work_group_size;
#endif
if (max_work_group_size > 0 && use_lws) {
local_work_size = context.cl_context()->LocalWorkSize(global_work_size,
max_work_group_size);
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "local_work_size[3D]: {" << local_work_size[0] << ","
<< local_work_size[1] << "," << local_work_size[2] << "}";
#endif
}
status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel(
......@@ -767,6 +798,7 @@ void ConvImageCompute::Conv2d5x5() {
int w = default_work_size[1];
int nh = default_work_size[2];
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "============ conv2d params ============";
VLOG(4) << "input_image_shape: " << input_image_shape["width"] << ","
<< input_image_shape["height"];
......@@ -789,6 +821,7 @@ void ConvImageCompute::Conv2d5x5() {
VLOG(4) << "default work size{c_block, w, nh}: "
<< "{" << c_block << ", " << w << ", " << nh << ""
<< "}";
#endif
CHECK_GE(dilations.size(), 2);
CHECK(dilations[0] == dilations[1]);
......@@ -808,9 +841,12 @@ void ConvImageCompute::Conv2d5x5() {
STL::stringstream kernel_key;
kernel_key << kernel_func_names_[0] << build_options_[0];
auto kernel = context.cl_context()->GetKernel(kernel_key.str());
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "kernel_key: " << kernel_key.str();
VLOG(4) << "kernel ready ... " << kernel_key.str();
VLOG(4) << "w: " << w;
#endif
cl_int status;
int arg_idx = 0;
......@@ -825,7 +861,9 @@ void ConvImageCompute::Conv2d5x5() {
status = kernel.setArg(++arg_idx, *filter_image);
CL_CHECK_FATAL(status);
if (has_bias) {
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "set bias_image: ";
#endif
status = kernel.setArg(++arg_idx, *bias_image);
CL_CHECK_FATAL(status);
}
......@@ -855,9 +893,11 @@ void ConvImageCompute::Conv2d5x5() {
static_cast<size_t>(default_work_size.data()[1]),
static_cast<size_t>(default_work_size.data()[2])};
#ifndef LITE_SHUTDOWN_LOG
// VLOG(4) << "out_image: " << out_image;
VLOG(4) << "global_work_size[3D]: {" << global_work_size[0] << ","
<< global_work_size[1] << "," << global_work_size[2] << "}";
#endif
status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel(
kernel,
......@@ -912,6 +952,7 @@ void ConvImageCompute::Conv2d7x7() {
int w = default_work_size[1];
int nh = default_work_size[2];
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "============ conv2d params ============";
VLOG(4) << "input_image_shape: " << input_image_shape["width"] << ","
<< input_image_shape["height"];
......@@ -934,6 +975,7 @@ void ConvImageCompute::Conv2d7x7() {
VLOG(4) << "default work size{c_block, w, nh}: "
<< "{" << c_block << ", " << w << ", " << nh << ""
<< "}";
#endif
CHECK_GE(dilations.size(), 2);
CHECK(dilations[0] == dilations[1]);
......@@ -953,9 +995,12 @@ void ConvImageCompute::Conv2d7x7() {
STL::stringstream kernel_key;
kernel_key << kernel_func_names_[0] << build_options_[0];
auto kernel = context.cl_context()->GetKernel(kernel_key.str());
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "kernel_key: " << kernel_key.str();
VLOG(4) << "kernel ready ... " << kernel_key.str();
VLOG(4) << "w: " << w;
#endif
cl_int status;
int arg_idx = 0;
......@@ -970,7 +1015,9 @@ void ConvImageCompute::Conv2d7x7() {
status = kernel.setArg(++arg_idx, *filter_image);
CL_CHECK_FATAL(status);
if (has_bias) {
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "set bias_image: ";
#endif
status = kernel.setArg(++arg_idx, *bias_image);
CL_CHECK_FATAL(status);
}
......@@ -1000,9 +1047,11 @@ void ConvImageCompute::Conv2d7x7() {
static_cast<size_t>(default_work_size.data()[1]),
static_cast<size_t>(default_work_size.data()[2])};
#ifndef LITE_SHUTDOWN_LOG
// VLOG(4) << "out_image: " << out_image;
VLOG(4) << "global_work_size[3D]: {" << global_work_size[0] << ","
<< global_work_size[1] << "," << global_work_size[2] << "}";
#endif
status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel(
kernel,
......@@ -1071,7 +1120,9 @@ void ConvImageCompute::DepthwiseConv2d3x3s1() {
const cl::Image2D* bias_image = nullptr;
if (has_bias) {
bias_image = bias_gpu_image_.data<half_t, cl::Image2D>();
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "set bias_image: ";
#endif
status = kernel.setArg(++arg_idx, *bias_image);
CL_CHECK_FATAL(status);
}
......@@ -1099,12 +1150,16 @@ void ConvImageCompute::DepthwiseConv2d3x3s1() {
CL_KERNEL_WORK_GROUP_SIZE,
&max_work_group_size);
cl::NDRange local_work_size = cl::NullRange;
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "max_work_group_size: " << max_work_group_size;
#endif
if (max_work_group_size > 0 && use_lws) {
local_work_size = context.cl_context()->LocalWorkSize(global_work_size,
max_work_group_size);
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "local_work_size[3D]: {" << local_work_size[0] << ","
<< local_work_size[1] << "," << local_work_size[2] << "}";
#endif
}
status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel(
......@@ -1153,6 +1208,7 @@ void ConvImageCompute::DepthwiseConv2d3x3() {
int nh = output_dims[0] * output_dims[2];
auto global_work_size = cl::NDRange(c_block, w, nh);
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "setArg";
VLOG(4) << "c_block = " << c_block;
VLOG(4) << "w = " << w;
......@@ -1166,6 +1222,7 @@ void ConvImageCompute::DepthwiseConv2d3x3() {
VLOG(4) << "x_dims[2] = " << x_dims[2];
VLOG(4) << "output_dims[3] = " << output_dims[3];
VLOG(4) << "output_dims[2] = " << output_dims[2];
#endif
cl_int status;
int arg_idx = 0;
......@@ -1185,7 +1242,9 @@ void ConvImageCompute::DepthwiseConv2d3x3() {
const cl::Image2D* bias_image = nullptr;
if (has_bias) {
bias_image = bias_gpu_image_.data<half_t, cl::Image2D>();
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "set bias_image: ";
#endif
status = kernel.setArg(++arg_idx, *bias_image);
CL_CHECK_FATAL(status);
}
......@@ -1261,6 +1320,7 @@ void ConvImageCompute::DepthwiseConv2d() {
int w = default_work_size[1];
int nh = default_work_size[2];
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "============ depthwise conv2d params ============";
VLOG(4) << "input_image_shape: " << input_image_shape["width"] << ","
<< input_image_shape["height"];
......@@ -1282,6 +1342,7 @@ void ConvImageCompute::DepthwiseConv2d() {
VLOG(4) << "default work size{c_block, w, nh}: "
<< "{" << c_block << ", " << w << ", " << nh << ""
<< "}";
#endif
CHECK_GE(dilations.size(), 2);
CHECK(dilations[0] == dilations[1]);
......@@ -1303,9 +1364,12 @@ void ConvImageCompute::DepthwiseConv2d() {
STL::stringstream kernel_key;
kernel_key << kernel_func_names_[0] << build_options_[0];
auto kernel = context.cl_context()->GetKernel(kernel_key.str());
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "kernel_key: " << kernel_key.str();
VLOG(4) << "kernel ready ... " << kernel_key.str();
VLOG(4) << "w: " << w;
#endif
cl_int status;
int arg_idx = 0;
......@@ -1320,7 +1384,9 @@ void ConvImageCompute::DepthwiseConv2d() {
status = kernel.setArg(++arg_idx, *filter_image);
CL_CHECK_FATAL(status);
if (has_bias) {
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "set bias_image: ";
#endif
status = kernel.setArg(++arg_idx, *bias_image);
CL_CHECK_FATAL(status);
}
......@@ -1354,9 +1420,11 @@ void ConvImageCompute::DepthwiseConv2d() {
static_cast<size_t>(default_work_size.data()[1]),
static_cast<size_t>(default_work_size.data()[2])};
#ifndef LITE_SHUTDOWN_LOG
// VLOG(4) << "out_image: " << out_image;
VLOG(4) << "global_work_size[3D]: {" << global_work_size[0] << ","
<< global_work_size[1] << "," << global_work_size[2] << "}";
#endif
status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel(
kernel,
......
......@@ -41,9 +41,11 @@ void ElementwiseAddCompute::Run() {
STL::stringstream kernel_key;
kernel_key << kernel_func_name_ << build_options_;
auto kernel = context.cl_context()->GetKernel(kernel_key.str());
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << TargetToStr(ele_param_->X->target());
VLOG(4) << TargetToStr(ele_param_->Y->target());
VLOG(4) << TargetToStr(ele_param_->Out->target());
#endif
int arg_idx = 0;
cl_int status = kernel.setArg(arg_idx, *x_buf);
CL_CHECK_FATAL(status);
......@@ -87,10 +89,12 @@ void ElementwiseAddCompute::UpdateParams() {
for (int i = static_cast<int>(y_dims.size() + axis); i < x_dims.size(); ++i) {
num_ *= x_dims[i];
}
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "axis: " << axis;
VLOG(4) << "batch: " << batch_;
VLOG(4) << "channels: " << channels_;
VLOG(4) << "num: " << num_;
#endif
}
} // namespace opencl
......
......@@ -62,6 +62,7 @@ void ElementwiseAddImageCompute::Run() {
auto* out = ele_param_->Out;
auto axis = ele_param_->axis;
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "x->target():" << TargetToStr(x->target());
VLOG(4) << "y->target():" << TargetToStr(y->target());
VLOG(4) << "out->target():" << TargetToStr(out->target());
......@@ -69,6 +70,7 @@ void ElementwiseAddImageCompute::Run() {
VLOG(4) << "y->dims():" << y->dims();
VLOG(4) << "out->dims():" << out->dims();
VLOG(4) << "axis:" << axis;
#endif
paddle::lite::CLImageConverterDefault default_convertor;
auto x_img_shape = default_convertor.InitImageDimInfoWith(x->dims()); // w, h
......@@ -83,10 +85,12 @@ void ElementwiseAddImageCompute::Run() {
auto* out_img = out->mutable_data<half_t, cl::Image2D>(out_img_shape[0],
out_img_shape[1]);
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "x_img_shape[w,h]:" << x_img_width << " " << x_img_height;
VLOG(4) << "y_img_shape[w,h]:" << y_img_shape[0] << " " << y_img_shape[1];
VLOG(4) << "out_img_shape[w,h]:" << out_img_shape[0] << " "
<< out_img_shape[1];
#endif
STL::stringstream kernel_key;
kernel_key << kernel_func_name_ << build_options_;
......@@ -104,8 +108,9 @@ void ElementwiseAddImageCompute::Run() {
} else if (y_dims.size() == 1) {
if (axis == x->dims().size() - 1 || axis == x->dims().size() - 3) {
int tensor_w = x->dims()[x->dims().size() - 1];
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "tensor_w:" << tensor_w;
#endif
cl_int status = kernel.setArg(arg_idx, *x_img);
CL_CHECK_FATAL(status);
status = kernel.setArg(++arg_idx, *y_img);
......@@ -127,7 +132,9 @@ void ElementwiseAddImageCompute::Run() {
auto global_work_size = cl::NDRange{static_cast<cl::size_type>(x_img_width),
static_cast<cl::size_type>(x_img_height)};
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "global_work_size:[2D]:" << x_img_width << " " << x_img_height;
#endif
auto status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel(
kernel,
cl::NullRange,
......
......@@ -80,12 +80,14 @@ class ElementwiseMulImageCompute
auto* y = ele_param_->Y;
auto* out = ele_param_->Out;
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "x->target():" << TargetToStr(x->target());
VLOG(4) << "y->target():" << TargetToStr(y->target());
VLOG(4) << "out->target():" << TargetToStr(out->target());
VLOG(4) << "x->dims():" << x->dims();
VLOG(4) << "y->dims():" << y->dims();
VLOG(4) << "out->dims():" << out->dims();
#endif
paddle::lite::CLImageConverterDefault default_convertor;
auto x_img_shape =
......@@ -101,10 +103,12 @@ class ElementwiseMulImageCompute
auto* out_img = out->mutable_data<half_t, cl::Image2D>(out_img_shape[0],
out_img_shape[1]);
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "x_img_shape[w,h]:" << x_img_width << " " << x_img_height;
VLOG(4) << "y_img_shape[w,h]:" << y_img_shape[0] << " " << y_img_shape[1];
VLOG(4) << "out_img_shape[w,h]:" << out_img_shape[0] << " "
<< out_img_shape[1];
#endif
STL::stringstream kernel_key;
kernel_key << kernel_func_name_ << build_options_;
......@@ -123,7 +127,9 @@ class ElementwiseMulImageCompute
CL_CHECK_FATAL(status);
} else if (y_dims.size() == 1 || y_dims.size() == 4) {
auto tensor_w = x_dims[x_dims.size() - 1];
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "tensor_w:" << tensor_w;
#endif
// kernel: channel_mul_d1 / channel_mul_d4
cl_int status = kernel.setArg(arg_idx, *x_img);
CL_CHECK_FATAL(status);
......@@ -136,7 +142,9 @@ class ElementwiseMulImageCompute
} else if (y_dims.size() == 2) {
if (x_dims[0] == y_dims[0] && x_dims[1] == y_dims[1]) {
auto tensor_w = x_dims[x_dims.size() - 1];
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "tensor_w:" << tensor_w;
#endif
// kernel: channel_mul_d2_nc
cl_int status = kernel.setArg(arg_idx, *x_img);
CL_CHECK_FATAL(status);
......@@ -149,7 +157,9 @@ class ElementwiseMulImageCompute
} else {
auto y_tensor_h = y->dims()[0];
auto y_tensor_w = y->dims()[1];
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "y_tensor_w:" << y_tensor_w << " y_tensor_h:" << y_tensor_h;
#endif
// kernel: channel_mul_d2_hw
cl_int status = kernel.setArg(arg_idx, *x_img);
CL_CHECK_FATAL(status);
......@@ -179,8 +189,9 @@ class ElementwiseMulImageCompute
event_.get());
CL_CHECK_FATAL(status);
context.cl_wait_list()->emplace(out_img, event_);
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "global_work_size:[2D]:" << x_img_width << " " << x_img_height;
#endif
}
protected:
......
......@@ -62,6 +62,7 @@ void ElementwiseSubImageCompute::Run() {
auto* out = ele_param_->Out;
auto axis = ele_param_->axis;
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "x->target():" << TargetToStr(x->target());
VLOG(4) << "y->target():" << TargetToStr(y->target());
VLOG(4) << "out->target():" << TargetToStr(out->target());
......@@ -69,6 +70,7 @@ void ElementwiseSubImageCompute::Run() {
VLOG(4) << "y->dims():" << y->dims();
VLOG(4) << "out->dims():" << out->dims();
VLOG(4) << "axis:" << axis;
#endif
paddle::lite::CLImageConverterDefault default_convertor;
auto x_img_shape = default_convertor.InitImageDimInfoWith(x->dims()); // w, h
......@@ -83,10 +85,12 @@ void ElementwiseSubImageCompute::Run() {
auto* out_img = out->mutable_data<half_t, cl::Image2D>(out_img_shape[0],
out_img_shape[1]);
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "x_img_shape[w,h]:" << x_img_width << " " << x_img_height;
VLOG(4) << "y_img_shape[w,h]:" << y_img_shape[0] << " " << y_img_shape[1];
VLOG(4) << "out_img_shape[w,h]:" << out_img_shape[0] << " "
<< out_img_shape[1];
#endif
STL::stringstream kernel_key;
kernel_key << kernel_func_name_ << build_options_;
......@@ -104,8 +108,9 @@ void ElementwiseSubImageCompute::Run() {
} else if (y_dims.size() == 1) {
if (axis == x->dims().size() - 1 || axis == x->dims().size() - 3) {
int tensor_w = x->dims()[x->dims().size() - 1];
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "tensor_w:" << tensor_w;
#endif
cl_int status = kernel.setArg(arg_idx, *x_img);
CL_CHECK_FATAL(status);
status = kernel.setArg(++arg_idx, *y_img);
......@@ -127,7 +132,10 @@ void ElementwiseSubImageCompute::Run() {
auto global_work_size = cl::NDRange{static_cast<cl::size_type>(x_img_width),
static_cast<cl::size_type>(x_img_height)};
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "global_work_size:[2D]:" << x_img_width << " " << x_img_height;
#endif
auto status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel(
kernel,
cl::NullRange,
......
......@@ -57,10 +57,12 @@ class GridSamplerImageCompute : public KernelLite<TARGET(kOpenCL),
auto out_dims = out->dims();
auto in_dims = x->dims();
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "x->target():" << TargetToStr(x->target());
VLOG(4) << "out->target():" << TargetToStr(out->target());
VLOG(4) << "x->dims():" << in_dims;
VLOG(4) << "out->dims():" << out_dims;
#endif
auto out_image_shape = InitImageDimInfoWith(out_dims);
auto* x_img = x->data<half_t, cl::Image2D>();
......@@ -71,10 +73,11 @@ class GridSamplerImageCompute : public KernelLite<TARGET(kOpenCL),
auto* out_img = out->mutable_data<half_t, cl::Image2D>(
out_image_shape["width"], out_image_shape["height"]);
#ifndef LITE_SHUTDOWN_LOG
// VLOG(4) << "out_image" << out_img;
VLOG(4) << "out_image_shape[w,h]:" << out_image_shape["width"] << " "
<< out_image_shape["height"];
#endif
STL::stringstream kernel_key;
kernel_key << kernel_func_name_ << build_options_;
auto kernel = context.cl_context()->GetKernel(kernel_key.str());
......@@ -87,8 +90,10 @@ class GridSamplerImageCompute : public KernelLite<TARGET(kOpenCL),
DDim(std::vector<DDim::value_type>{
static_cast<int64_t>(out_image_shape["width"]),
static_cast<int64_t>(out_image_shape["height"])}));
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "default_work_size: " << default_work_size[0] << ", "
<< default_work_size[1] << ", " << default_work_size[2];
#endif
cl_int status = kernel.setArg(arg_idx++, *x_img);
CL_CHECK_FATAL(status);
status = kernel.setArg(arg_idx++, *grid_img);
......@@ -114,9 +119,10 @@ class GridSamplerImageCompute : public KernelLite<TARGET(kOpenCL),
event_.get());
CL_CHECK_FATAL(status);
context.cl_wait_list()->emplace(out_img, event_);
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "global_work_size:[2D]:" << global_work_size[0] << " "
<< global_work_size[1] << " " << global_work_size[2];
#endif
}
protected:
......
......@@ -89,19 +89,23 @@ class InstanceNormImageCompute : public KernelLite<TARGET(kOpenCL),
int in_h = in_dims[2];
int in_w = in_dims[3];
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "x->target():" << TargetToStr(x->target());
VLOG(4) << "out->target():" << TargetToStr(out->target());
VLOG(4) << "x->dims():" << in_dims;
#endif
auto out_image_shape = InitImageDimInfoWith(in_dims);
auto* x_img = x->data<half_t, cl::Image2D>();
auto* out_img = out->mutable_data<half_t, cl::Image2D>(
out_image_shape["width"], out_image_shape["height"]);
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "out_image_shape[w,h]: " << out_image_shape["width"] << " "
<< out_image_shape["height"];
VLOG(4) << "in_h: " << in_h << ", in_w: " << in_w;
#endif
int threads = 512;
int group_size_x = (channel + 3) / 4;
......@@ -113,10 +117,13 @@ class InstanceNormImageCompute : public KernelLite<TARGET(kOpenCL),
cl::NDRange{static_cast<cl::size_type>(group_size_x * threads),
static_cast<cl::size_type>(group_size_y),
static_cast<cl::size_type>(1)};
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "local_work_size:[2D]:" << local_work_size[0] << " "
<< local_work_size[1] << " " << local_work_size[2];
VLOG(4) << "global_work_size:[2D]:" << global_work_size[0] << " "
<< global_work_size[1] << " " << global_work_size[2];
#endif
STL::stringstream kernel_key;
kernel_key << kernel_func_name_ << build_options_;
......
......@@ -42,11 +42,13 @@ class IoCopyHostToOpenCLCompute
CHECK(param.x->target() == TARGET(kHost) ||
param.x->target() == TARGET(kARM));
auto mem_size = param.x->memory_size();
#ifndef LITE_SHUTDOWN_LOG
VLOG(2) << "param.x->memory_size():" << mem_size;
VLOG(2) << "param.x->dims().size():" << param.x->dims().size();
VLOG(2) << "param.x->dims():" << param.x->dims();
VLOG(2) << "param.y->dims().size():" << param.y->dims().size();
VLOG(2) << "param.y->dims():" << param.y->dims();
#endif
auto* data = param.y->mutable_data(TARGET(kOpenCL), mem_size);
CopyFromHostSync(data, param.x->raw_data(), mem_size);
}
......@@ -85,12 +87,14 @@ class IoCopykOpenCLToHostCompute
CHECK(param.x->target() == TARGET(kOpenCL));
auto mem_size = param.x->memory_size();
#ifndef LITE_SHUTDOWN_LOG
VLOG(2) << "copy size " << mem_size;
VLOG(2) << "param.x->dims().size():" << param.x->dims().size();
VLOG(2) << "param.x->dims():" << param.x->dims();
VLOG(2) << "param.y->dims().size():" << param.y->dims().size();
VLOG(2) << "param.y->dims():" << param.y->dims();
VLOG(2) << "param.process_type:" << param.process_type;
#endif
auto* data = param.y->mutable_data(TARGET(kHost), mem_size);
const cl::Buffer* x_ptr;
......@@ -104,7 +108,9 @@ class IoCopykOpenCLToHostCompute
auto* wait_list = context.cl_wait_list();
auto it = wait_list->find(x_ptr);
if (it != wait_list->end()) {
#ifndef LITE_SHUTDOWN_LOG
VLOG(2) << "--- Find the sync event for the target cl tensor. ---";
#endif
auto& event = *(it->second);
event.wait();
} else {
......
......@@ -74,6 +74,7 @@ class LayoutComputeBufferChwToImageDefault
const int Stride1 = out_H * out_W;
const int Stride0 = out_W;
#ifndef LITE_SHUTDOWN_LOG
VLOG(2) << "param.process_type:" << param.process_type;
VLOG(2) << "x_dims:" << x_dims;
VLOG(2) << "param.x->memory_size():" << param.x->memory_size();
......@@ -89,6 +90,7 @@ class LayoutComputeBufferChwToImageDefault
VLOG(2) << "Stride2:" << Stride2;
VLOG(2) << "Stride1:" << Stride1;
VLOG(2) << "Stride0:" << Stride0;
#endif
auto& context = ctx_->As<OpenCLContext>();
CHECK(context.cl_context() != nullptr);
......@@ -177,6 +179,7 @@ class LayoutComputeImageDefaultToBufferChw
new_dims[4 - x_dims.size() + j] = x_dims[j];
}
#ifndef LITE_SHUTDOWN_LOG
VLOG(2) << "param.process_type:" << param.process_type;
VLOG(2) << "x_dims:" << x_dims;
VLOG(2) << "param.x->memory_size():" << param.x->memory_size();
......@@ -186,6 +189,7 @@ class LayoutComputeImageDefaultToBufferChw
<< new_dims[1] << " " << new_dims[2] << " " << new_dims[3];
VLOG(2) << "y_dims:" << y_dims;
VLOG(2) << "param.y->memory_size():" << param.y->memory_size();
#endif
size_t C = new_dims[1];
size_t in_height = new_dims[2];
......@@ -217,8 +221,10 @@ class LayoutComputeImageDefaultToBufferChw
CL_CHECK_FATAL(status);
status = kernel.setArg(++arg_idx, static_cast<const int>(C));
CL_CHECK_FATAL(status);
#ifndef LITE_SHUTDOWN_LOG
VLOG(2) << "gws:[3D]" << ((new_dims[1] + 3) / 4) << " " << new_dims[3]
<< " " << (new_dims[0] * new_dims[2]);
#endif
auto global_work_size =
cl::NDRange{static_cast<cl::size_type>((new_dims[1] + 3) / 4),
static_cast<cl::size_type>(new_dims[3]),
......
......@@ -65,6 +65,7 @@ class LrnImageCompute : public KernelLite<TARGET(kOpenCL),
auto out_dims = out->dims();
auto in_dims = x->dims();
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "x->target(): " << TargetToStr(x->target());
VLOG(4) << "out->target(): " << TargetToStr(out->target());
VLOG(4) << "x->dims(): " << in_dims;
......@@ -74,6 +75,7 @@ class LrnImageCompute : public KernelLite<TARGET(kOpenCL),
VLOG(4) << "alpha: " << alpha_;
VLOG(4) << "beta: " << beta_;
VLOG(4) << "norm_region: " << norm_region_;
#endif
auto out_image_shape = InitImageDimInfoWith(out_dims);
auto* x_img = x->data<half_t, cl::Image2D>();
......@@ -81,9 +83,12 @@ class LrnImageCompute : public KernelLite<TARGET(kOpenCL),
auto* out_img = out->mutable_data<half_t, cl::Image2D>(
out_image_shape["width"], out_image_shape["height"]);
#ifndef LITE_SHUTDOWN_LOG
// VLOG(4) << "out_image" << out_img;
VLOG(4) << "out_image_shape[w,h]:" << out_image_shape["width"] << " "
<< out_image_shape["height"];
#endif
STL::stringstream kernel_key;
kernel_key << kernel_func_name_ << build_options_;
......@@ -97,8 +102,10 @@ class LrnImageCompute : public KernelLite<TARGET(kOpenCL),
DDim(std::vector<DDim::value_type>{
static_cast<int64_t>(out_image_shape["width"]),
static_cast<int64_t>(out_image_shape["height"])}));
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "default_work_size: " << default_work_size[0] << ", "
<< default_work_size[1] << ", " << default_work_size[3];
#endif
cl_int status = kernel.setArg(arg_idx++, *x_img);
CL_CHECK_FATAL(status);
status = kernel.setArg(arg_idx++, *out_img);
......@@ -130,9 +137,10 @@ class LrnImageCompute : public KernelLite<TARGET(kOpenCL),
event_.get());
CL_CHECK_FATAL(status);
context.cl_wait_list()->emplace(out_img, event_);
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "global_work_size:[2D]:" << global_work_size[0] << " "
<< global_work_size[1] << " " << global_work_size[2];
#endif
}
protected:
......
......@@ -87,6 +87,7 @@ class NearestInterpComputeImageDefault
status = kernel.setArg(++arg_idx, static_cast<const int>(out_dims_w));
CL_CHECK_FATAL(status);
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << TargetToStr(param.X->target());
VLOG(4) << TargetToStr(param.Out->target());
VLOG(4) << "out_image_shape(w,h):" << out_image_shape["width"] << " "
......@@ -95,6 +96,7 @@ class NearestInterpComputeImageDefault
<< x_dims[1] << " " << x_dims[2] << " " << x_dims[3];
VLOG(4) << "y_dims[" << y_dims.size() << "D]:" << y_dims[0] << " "
<< y_dims[1] << " " << y_dims[2] << " " << y_dims[3];
#endif
const std::vector<size_t>& default_work_size =
DefaultWorkSize(y_dims,
......
......@@ -71,10 +71,12 @@ class Pad2dCompute : public KernelLite<TARGET(kOpenCL),
int out_h = out_dims[2];
int out_w = out_dims[3];
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "x->target():" << TargetToStr(x->target());
VLOG(4) << "out->target():" << TargetToStr(out->target());
VLOG(4) << "x->dims():" << in_dims;
VLOG(4) << "out->dims():" << out_dims;
#endif
auto out_image_shape = InitImageDimInfoWith(out_dims);
auto* x_img = x->data<half_t, cl::Image2D>();
......@@ -82,11 +84,13 @@ class Pad2dCompute : public KernelLite<TARGET(kOpenCL),
auto* out_img = out->mutable_data<half_t, cl::Image2D>(
out_image_shape["width"], out_image_shape["height"]);
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "out_image_shape[w,h]: " << out_image_shape["width"] << " "
<< out_image_shape["height"];
VLOG(4) << "in_h: " << in_h << ", in_w: " << in_w;
VLOG(4) << "out_h: " << out_h << ", out_w: " << out_w;
#endif
STL::stringstream kernel_key;
kernel_key << kernel_func_name_ << build_options_;
......@@ -98,9 +102,10 @@ class Pad2dCompute : public KernelLite<TARGET(kOpenCL),
DDim(std::vector<DDim::value_type>{
static_cast<int64_t>(out_image_shape["width"]),
static_cast<int64_t>(out_image_shape["height"])}));
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "default_work_size: " << default_work_size[0] << ", "
<< default_work_size[1] << ", " << default_work_size[2];
#endif
int pad_h0 = pad2d_param_->paddings[0];
int pad_h1 = pad2d_param_->paddings[1];
int pad_w0 = pad2d_param_->paddings[2];
......@@ -144,9 +149,10 @@ class Pad2dCompute : public KernelLite<TARGET(kOpenCL),
event_.get());
CL_CHECK_FATAL(status);
context.cl_wait_list()->emplace(out_img, event_);
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "global_work_size:[2D]:" << global_work_size[0] << " "
<< global_work_size[1] << " " << global_work_size[2];
#endif
}
protected:
......
......@@ -59,10 +59,14 @@ class PoolComputeImage2D : public KernelLite<TARGET(kOpenCL),
std::vector<int> paddings = *param.paddings;
std::vector<int> strides = param.strides;
std::vector<int> ksize = param.ksize;
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "global_pooling: " << global_pooling;
VLOG(4) << "pooling_type: " << pooling_type;
VLOG(4) << "paddings : " << paddings[0] << " " << paddings[1] << " "
<< paddings[2] << " " << paddings[3] << " ";
#endif
if (global_pooling) {
for (size_t i = 0; i < ksize.size(); ++i) {
paddings[2 * i] = 0;
......@@ -70,6 +74,8 @@ class PoolComputeImage2D : public KernelLite<TARGET(kOpenCL),
ksize[i] = static_cast<int>(in_dims[i + 2]);
}
}
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "in_dims : [" << in_dims.size() << "]" << in_dims[0] << " "
<< in_dims[1] << " " << in_dims[2] << " " << in_dims[3];
VLOG(4) << "out_dims : [" << out_dims.size() << "]" << out_dims[0] << " "
......@@ -82,6 +88,8 @@ class PoolComputeImage2D : public KernelLite<TARGET(kOpenCL),
<< ksize[1] << " " << ksize[2] << " " << ksize[3];
VLOG(4) << "paddings : [" << paddings.size() << "]" << paddings[0] << " "
<< paddings[1] << " " << paddings[2] << " " << paddings[3];
#endif
bool pads_equal =
(paddings[0] == paddings[1]) && (paddings[2] == paddings[3]);
if (!pads_equal) {
......@@ -95,8 +103,10 @@ class PoolComputeImage2D : public KernelLite<TARGET(kOpenCL),
// VLOG(4) << "x_image" << x_img;
auto out_image_shape = InitImageDimInfoWith(out_dims);
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "out_image_shape = " << out_image_shape["width"] << " "
<< out_image_shape["height"];
#endif
auto* out_img = param.output->mutable_data<half_t, cl::Image2D>(
out_image_shape["width"], out_image_shape["height"]);
// VLOG(4) << "out_image" << out_img;
......@@ -109,8 +119,10 @@ class PoolComputeImage2D : public KernelLite<TARGET(kOpenCL),
int w = out_dims[3];
int nh = out_dims[0] * out_dims[2];
auto global_work_size = cl::NDRange(c_block, w, nh);
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "global_work_size : [" << 3 << "]" << c_block << " " << w
<< " " << nh << " ";
#endif
cl_int status;
int arg_idx = 0;
status = kernel.setArg(arg_idx, *x_img);
......
......@@ -41,8 +41,6 @@ class ReshapeComputeFloatImage : public KernelLite<TARGET(kOpenCL),
}
void Run() override {
VLOG(4) << "reshape_compute run ... ";
auto& param = *param_.get_mutable<param_t>();
const Tensor* const x = param.x;
......@@ -64,8 +62,9 @@ class ReshapeComputeFloatImage : public KernelLite<TARGET(kOpenCL),
InitImageDimInfoWith(out_dims);
cl::Image2D* const out_image = output->mutable_data<half_t, cl::Image2D>(
out_image_shape.at("width"), out_image_shape.at("height"));
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "out_dims= " << out_dims;
#endif
const std::vector<size_t>& default_work_size = DefaultWorkSize(
out_dims,
DDim(std::vector<DDim::value_type>{
......@@ -94,6 +93,8 @@ class ReshapeComputeFloatImage : public KernelLite<TARGET(kOpenCL),
int out_Stride0 = out_W;
int out_Stride1 = out_H * out_W;
int out_Stride2 = out_C * out_H * out_W;
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "out_C=" << out_C;
VLOG(4) << "out_H=" << out_H;
VLOG(4) << "out_W=" << out_W;
......@@ -104,17 +105,20 @@ class ReshapeComputeFloatImage : public KernelLite<TARGET(kOpenCL),
VLOG(4) << "in_Stride1=" << in_Stride1;
VLOG(4) << "out_Stride0=" << out_Stride0;
VLOG(4) << "out_Stride1=" << out_Stride1;
#endif
auto& context = ctx_->As<OpenCLContext>();
CHECK(context.cl_context() != nullptr);
STL::stringstream kernel_key;
kernel_key << kernel_func_name_ << build_options_;
auto kernel = context.cl_context()->GetKernel(kernel_key.str());
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << TargetToStr(x->target());
VLOG(4) << TargetToStr(param.output->target());
#endif
int arg_idx = 0;
cl_int status;
status = kernel.setArg(arg_idx, *x_image);
CL_CHECK_FATAL(status);
......
......@@ -51,8 +51,10 @@ class ScaleComputeImage2D : public KernelLite<TARGET(kOpenCL),
// LOG(INFO) << "x_image" << x_img;
auto out_image_shape = InitImageDimInfoWith(in_dims);
LOG(INFO) << "out_image_shape = " << out_image_shape["width"] << " "
<< out_image_shape["height"];
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "out_image_shape = " << out_image_shape["width"] << " "
<< out_image_shape["height"];
#endif
auto* out_img = param.output->mutable_data<half_t, cl::Image2D>(
out_image_shape["width"], out_image_shape["height"]);
// LOG(INFO) << "out_image" << out_img;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册