提交 3bbab10d 编写于 作者: J Jiaying Zhao 提交者: GitHub

[Lite][OpenCL]fix depthwise_conv3*3 test. test=develop (#3100)

* [Lite][OpenCL]fix depthwise_conv3*3 test. test=develop

* [Lite][OpenCL]fix depthwise_conv3*3 test, format codestyle. test=develop
上级 628aa0df
......@@ -92,9 +92,6 @@ void ConvImageCompute::PrepareForRun() {
filter_image_dims[0], filter_image_dims[1], filter_image_v.data());
impl_ = &ConvImageCompute::Conv2d1x1;
#if 1 // TODO(ysh329): enable general dwconv
} else if (filter_dims[1] == 1 && x_dims[1] == output_dims[1]) {
#else // TODO(ysh329): remove dwconv3x3s1 and dwconv3x3 temporarily, need fix
} else if (filter_dims[1] == 1 && x_dims[1] == output_dims[1] &&
kernel_h == 3 && kernel_w == 3 && groups > 1) {
// depth_conv2d_3x3s1, depth_conv2d_3x3
......@@ -116,7 +113,6 @@ void ConvImageCompute::PrepareForRun() {
filter_image_dims[0], filter_image_dims[1], filter_image_v.data());
} else if (filter_dims[1] == 1 && x_dims[1] == output_dims[1] &&
kernel_h != 3) {
#endif
// depth_conv2d
kernel_func_names_.push_back("depth_conv2d");
kernel_func_paths_.push_back("image/depthwise_conv2d_basic_kernel.cl");
......
......@@ -21,10 +21,17 @@
#include "lite/backends/opencl/target_wrapper.h"
#include "lite/core/op_registry.h"
#include "lite/core/tensor.h"
#include "lite/kernels/opencl/test_helper.h"
namespace paddle {
namespace lite {
#define SHADOW_LOG VLOG(4)
#define FP16_MAX_DIFF (1e0)
#define FP16_ABS_DIFF (1e-1)
// #define TEST_DEPTHWISE_CONV_IMAGE_BASIC
#define TEST_DEPTHWISE_CONV_IMAGE_3X3
template <typename T, int STRIDE_H = 1, int STRIDE_W = 1>
void depth_conv(const T* input_data,
const lite::DDim& input_dims,
......@@ -105,8 +112,9 @@ int ConvOutputSize(int input_size,
return output_size;
}
#ifdef TEST_DEPTHWISE_CONV_IMAGE_BASIC
// #define LOOP_TEST
TEST(depthwise_conv2d_basic, compute) {
TEST(depthwise_conv2d, compute_basic) {
// conv infos
// const int ksize = 1;
const int stride = 1;
......@@ -383,133 +391,193 @@ TEST(depthwise_conv2d_basic, compute) {
// nothing to do.
#endif
}
#endif
TEST(depthwise_conv2d_image2d_fp16, compute) {
LOG(INFO) << "to get kernel ...";
auto kernels = KernelRegistry::Global().Create("depthwise_conv2d",
TARGET(kOpenCL),
PRECISION(kFP16),
DATALAYOUT(kImageDefault));
ASSERT_FALSE(kernels.empty());
auto kernel = std::move(kernels.front());
LOG(INFO) << "get kernel";
lite::Tensor input, filter, output;
operators::ConvParam param;
param.x = &input;
param.filter = &filter;
param.output = &output;
std::vector<int> paddings = {0, 0};
param.paddings = std::make_shared<std::vector<int>>(paddings);
param.strides = std::vector<int>{1, 1};
std::vector<int> dilations = {1, 1};
param.dilations = std::make_shared<std::vector<int>>(dilations);
std::unique_ptr<KernelContext> context(new KernelContext);
context->As<OpenCLContext>().InitOnce();
kernel->SetParam(param);
std::unique_ptr<KernelContext> dep_context(new KernelContext);
context->As<OpenCLContext>().CopySharedTo(
&(dep_context->As<OpenCLContext>()));
kernel->SetContext(std::move(dep_context));
LOG(INFO) << "kernel ready";
std::default_random_engine engine;
std::uniform_real_distribution<float> gen(-5, 5);
std::vector<float> input_v(1 * 32 * 112 * 112);
std::vector<float> filter_v(32 * 1 * 3 * 3);
for (auto& i : input_v) {
i = gen(engine);
}
for (auto& f : filter_v) {
f = gen(engine);
}
#ifdef TEST_DEPTHWISE_CONV_IMAGE_3X3
// #define LOOP_TEST
TEST(depthwise_conv2d, compute_image2d_3x3) {
const int fw = 3;
const int fh = fw;
int dilation = 1;
int stride = 1;
int pad = 0;
#ifdef LOOP_TEST
// for (int batch_size = 1; batch_size < 2; ++batch_size) {
for (int oc = 4; oc < 10; oc += 1) { // oc = ic
for (int ih = 3; ih < 15; ih += 1) { // ih
for (int iw = 3; iw < 15; iw += 1) { // iw
#else
const int oc = 32;
const int ih = 112;
const int iw = 112;
#endif
stride = (stride == 1) ? 2 : 1;
// pad = (pad == 0) ? 1 : 0;
const int fb = oc;
const int ic = oc;
const int oh = ConvOutputSize(ih, fh, dilation, pad, pad, stride);
const int ow = ConvOutputSize(iw, fw, dilation, pad, pad, stride);
LOG(INFO) << "to get kernel ...";
auto kernels =
KernelRegistry::Global().Create("depthwise_conv2d",
TARGET(kOpenCL),
PRECISION(kFP16),
DATALAYOUT(kImageDefault));
ASSERT_FALSE(kernels.empty());
auto kernel = std::move(kernels.front());
LOG(INFO) << "get kernel";
lite::Tensor input, filter, output;
operators::ConvParam param;
param.x = &input;
param.filter = &filter;
param.output = &output;
param.groups = oc;
std::vector<int> paddings = {pad, pad, pad, pad};
param.paddings = std::make_shared<std::vector<int>>(paddings);
param.strides = std::vector<int>{stride, stride};
std::vector<int> dilations = {dilation, dilation};
param.dilations = std::make_shared<std::vector<int>>(dilations);
std::unique_ptr<KernelContext> context(new KernelContext);
context->As<OpenCLContext>().InitOnce();
kernel->SetParam(param);
std::unique_ptr<KernelContext> dep_context(new KernelContext);
context->As<OpenCLContext>().CopySharedTo(
&(dep_context->As<OpenCLContext>()));
kernel->SetContext(std::move(dep_context));
LOG(INFO) << "kernel ready";
const DDim& input_dim =
lite::DDim{std::vector<int64_t>({1, ic, ih, iw})};
const DDim& filter_dim =
lite::DDim{std::vector<int64_t>({fb, 1, 3, 3})};
const DDim& output_dim =
lite::DDim{std::vector<int64_t>({1, oc, oh, ow})};
input.Resize(input_dim);
filter.Resize(filter_dim);
output.Resize(output_dim);
std::default_random_engine engine;
std::uniform_real_distribution<float> gen(-5, 5);
std::vector<float> input_v(input_dim.production());
std::vector<float> filter_v(filter_dim.production());
std::vector<float> output_v(output_dim.production());
for (auto& i : input_v) {
i = gen(engine);
}
for (auto& f : filter_v) {
f = gen(engine);
}
LOG(INFO) << "prepare input";
input.Resize({1, 32, 112, 112});
CLImageConverterDefault* default_converter = new CLImageConverterDefault();
DDim input_image_shape =
default_converter->InitImageDimInfoWith(input.dims());
LOG(INFO) << "input_image_shape = " << input_image_shape[0] << " "
<< input_image_shape[1];
std::vector<half_t> input_image_data(input_image_shape.production() *
4); // 4 : RGBA
default_converter->NCHWToImage(
input_v.data(), input_image_data.data(), input.dims());
auto* input_image = input.mutable_data<half_t, cl::Image2D>(
input_image_shape[0], input_image_shape[1], input_image_data.data());
LOG(INFO) << "prepare kernel";
filter.Resize({32, 1, 3, 3});
CLImageConverterNWBlock* nw_converter = new CLImageConverterNWBlock();
DDim filter_image_shape = nw_converter->InitImageDimInfoWith(filter.dims());
LOG(INFO) << "filter_image_shape = " << filter_image_shape[0] << " "
<< filter_image_shape[1];
std::vector<half_t> filter_image_data(filter_image_shape.production() *
4); // 4 : RGBA
nw_converter->NCHWToImage(
filter_v.data(), filter_image_data.data(), filter.dims());
auto* filter_image = filter.mutable_data<half_t, cl::Image2D>(
filter_image_shape[0], filter_image_shape[1], filter_image_data.data());
LOG(INFO) << "launch";
output.Resize({1, 32, 110, 110});
DDim output_image_shape =
default_converter->InitImageDimInfoWith(output.dims());
LOG(INFO) << "output_image_shape = " << output_image_shape[0] << " "
<< output_image_shape[1];
auto* output_image = output.mutable_data<half_t, cl::Image2D>(
output_image_shape[0], output_image_shape[1]);
kernel->Launch();
auto* wait_list = context->As<OpenCLContext>().cl_wait_list();
auto* out_ptr = param.output->data<half_t, cl::Image2D>();
auto it = wait_list->find(out_ptr);
if (it != wait_list->end()) {
VLOG(4) << "--- Find the sync event for the target cl tensor. ---";
LOG(INFO) << "--- Find the sync event for the target cl tensor. ---";
auto& event = *(it->second);
event.wait();
} else {
LOG(FATAL) << "Could not find the sync event for the target cl tensor.";
LOG(INFO) << "Could not find the sync event for the target cl tensor.";
}
LOG(INFO) << "prepare input";
CLImageConverterDefault* default_converter =
new CLImageConverterDefault();
DDim input_image_shape =
default_converter->InitImageDimInfoWith(input.dims());
LOG(INFO) << "input_image_shape = " << input_image_shape[0] << " "
<< input_image_shape[1];
std::vector<half_t> input_image_data(input_image_shape.production() *
4); // 4 : RGBA
default_converter->NCHWToImage(
input_v.data(), input_image_data.data(), input.dims());
auto* input_image =
input.mutable_data<half_t, cl::Image2D>(input_image_shape[0],
input_image_shape[1],
input_image_data.data());
LOG(INFO) << "prepare kernel";
filter.Assign<float, lite::DDim, TARGET(kARM)>(filter_v.data(),
filter_dim);
LOG(INFO) << "launch";
DDim output_image_shape =
default_converter->InitImageDimInfoWith(output.dims());
LOG(INFO) << "output_image_shape = " << output_image_shape[0] << " "
<< output_image_shape[1];
auto* output_image = output.mutable_data<half_t, cl::Image2D>(
output_image_shape[0], output_image_shape[1]);
kernel->Launch();
auto* wait_list = context->As<OpenCLContext>().cl_wait_list();
auto* out_ptr = param.output->data<half_t, cl::Image2D>();
auto it = wait_list->find(out_ptr);
if (it != wait_list->end()) {
VLOG(4) << "--- Find the sync event for the target cl tensor. ---";
LOG(INFO) << "--- Find the sync event for the target cl tensor. ---";
auto& event = *(it->second);
event.wait();
} else {
LOG(FATAL)
<< "Could not find the sync event for the target cl tensor.";
LOG(INFO)
<< "Could not find the sync event for the target cl tensor.";
}
lite::Tensor output_ref;
output_ref.Resize({1, 32, 110, 110});
auto* output_ref_data = output_ref.mutable_data<float>(TARGET(kARM));
depth_conv<float, 1, 1>(input_v.data(),
input.dims(),
filter_v.data(),
filter.dims(),
output_ref_data,
output_ref.dims());
const size_t cl_image2d_row_pitch{0};
const size_t cl_image2d_slice_pitch{0};
half_t* output_image_data = new half_t[output_image_shape.production() * 4];
TargetWrapperCL::ImgcpySync(output_image_data,
output_image,
output_image_shape[0],
output_image_shape[1],
cl_image2d_row_pitch,
cl_image2d_slice_pitch,
IoDirection::DtoH);
float* output_data = new float[output_image_shape.production() * 4];
default_converter->ImageToNCHW(
output_image_data, output_data, output_image_shape, output.dims());
LOG(INFO) << "output_data vs output_ref_data";
for (int i = 0; i < output.dims().production(); i++) {
EXPECT_NEAR(output_data[i], output_ref_data[i], 1e-4);
LOG(INFO) << output_data[i] << " " << output_ref_data[i];
lite::Tensor out_ref;
out_ref.Resize(output_dim);
auto* out_ref_data = out_ref.mutable_data<float>(TARGET(kARM));
if (stride == 1) {
depth_conv<float, 1, 1>(input_v.data(),
input.dims(),
filter_v.data(),
filter.dims(),
out_ref_data,
out_ref.dims());
} else if (stride == 2) {
depth_conv<float, 2, 2>(input_v.data(),
input.dims(),
filter_v.data(),
filter.dims(),
out_ref_data,
out_ref.dims());
}
const size_t cl_image2d_row_pitch{0};
const size_t cl_image2d_slice_pitch{0};
std::vector<half_t> output_image_data(output_image_shape.production() *
4);
TargetWrapperCL::ImgcpySync(output_image_data.data(),
output_image,
output_image_shape[0],
output_image_shape[1],
cl_image2d_row_pitch,
cl_image2d_slice_pitch,
IoDirection::DtoH);
default_converter->ImageToNCHW(output_image_data.data(),
output_v.data(),
output_image_shape,
output.dims());
LOG(INFO) << "output_data vs output_ref_data";
for (int i = 0; i < output.dims().production(); i++) {
auto relative_diff =
COMPUTE_RELATIVE_DIFF(output_v[i], out_ref_data[i]);
auto abs_diff = COMPUTE_ABS_DIFF(output_v[i], out_ref_data[i]);
EXPECT_FALSE(relative_diff > FP16_MAX_DIFF &&
abs_diff > FP16_ABS_DIFF);
if (relative_diff > FP16_MAX_DIFF && abs_diff > FP16_ABS_DIFF) {
LOG(FATAL) << "error idx:" << i << "output_v[" << i
<< "]:" << output_v[i] << " "
"out_ref_data["
<< i << "]:" << out_ref_data[i];
}
}
#ifdef LOOP_TEST
}
}
}
#else
// nothing to do.
#endif
}
#endif
} // namespace lite
} // namespace paddle
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册