提交 bdbca339 编写于 作者: C chenjiaoAngel

fix format. test=develop

上级 a81f190b
......@@ -28,9 +28,9 @@ namespace paddle {
namespace lite {
namespace kernels {
namespace opencl {
class BoxCoderComputeImage : public KernelLite<TARGET(kOpenCL),
PRECISION(kFP16),
DATALAYOUT(kImageDefault)> {
class BoxCoderComputeImage : public KernelLite<TARGET(kOpenCL),
PRECISION(kFP16),
DATALAYOUT(kImageDefault)> {
public:
using param_t = operators::BoxCoderParam;
......@@ -39,10 +39,10 @@ namespace opencl {
boxcoder_param_ = param_.get_mutable<param_t>();
if (boxcoder_param_->code_type == "decode_center_size" &&
boxcoder_param_->box_normalized == true) {
kernel_func_name_ = "decode_center_size";
kernel_func_name_ = "decode_center_size";
} else {
printf("This code_type %s doesn't support \n", boxcoder_param_->code_type.c_str());
return;
printf("This code_type %s doesn't support \n", boxcoder_param_->code_type.c_str());
return;
}
CHECK(context.cl_context() != nullptr);
VLOG(1) << "kernel_func_name_:" << kernel_func_name_;
......@@ -55,8 +55,9 @@ namespace opencl {
const auto& out_dims = boxcoder_param_->proposals->dims();
auto image_shape = InitImageDimInfoWith(out_dims);
auto* out_buf = boxcoder_param_->proposals->mutable_data<half_t, cl::Image2D>(
image_shape["width"], image_shape["height"]);
auto* out_buf =
boxcoder_param_->proposals->mutable_data<half_t, cl::Image2D>(
image_shape["width"], image_shape["height"]);
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "boxcoder input shape: ";
......@@ -67,70 +68,70 @@ namespace opencl {
const auto* input_targetbox = boxcoder_param_->target_box;
const auto& code_type = boxcoder_param_->code_type;
if (code_type == "decode_center_size") {
auto* prior_box_image = input_priorbox->data<half_t, cl::Image2D>();
auto* prior_box_var_image = input_priorboxvar->data<half_t, cl::Image2D>();
auto* target_box_image = input_targetbox->data<half_t, cl::Image2D>();
auto* prior_box_image = input_priorbox->data<half_t, cl::Image2D>();
auto* prior_box_var_image = input_priorboxvar->data<half_t, cl::Image2D>();
auto* target_box_image = input_targetbox->data<half_t, cl::Image2D>();
int new_dims[4] = {1, 1, 1, 1};
for (int i = 0; i < out_dims.size(); i++) {
new_dims[4 - out_dims.size() + i] = out_dims[i];
}
auto& context = ctx_->As<OpenCLContext>();
CHECK(context.cl_context() != nullptr);
STL::stringstream kernel_key;
kernel_key << kernel_func_name_ << build_options_;
auto kernel = context.cl_context()->GetKernel(kernel_key.str());
int new_dims[4] = {1, 1, 1, 1};
for (int i = 0; i < out_dims.size(); i++) {
new_dims[4 - out_dims.size() + i] = out_dims[i];
}
auto& context = ctx_->As<OpenCLContext>();
CHECK(context.cl_context() != nullptr);
STL::stringstream kernel_key;
kernel_key << kernel_func_name_ << build_options_;
auto kernel = context.cl_context()->GetKernel(kernel_key.str());
auto default_work_size = DefaultWorkSize(out_dims,
DDim(std::vector<DDim::value_type>{
static_cast<int64_t>(image_shape["width"]),
static_cast<int64_t>(image_shape["height"])}));
auto default_work_size = DefaultWorkSize(out_dims,
DDim(std::vector<DDim::value_type>{
static_cast<int64_t>(image_shape["width"]),
static_cast<int64_t>(image_shape["height"])}));
int out_C = new_dims[1];
int out_H = new_dims[2];
int out_C = new_dims[1];
int out_H = new_dims[2];
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << TargetToStr(boxcoder_param_->proposals->target());
VLOG(4) << "output shape: " << out_dims[0] << ", " <<
out_dims[1] << ", " <<
out_dims[2] << ", " <<
out_dims[3];
VLOG(4) << "image_shape(w,h):" << image_shape["width"] << " "
<< image_shape["height"];
VLOG(4) << "out_C = " << out_C;
VLOG(4) << "out_H = " << out_H;
VLOG(4) << "default_work_size = " << default_work_size[0] << ", "
<< default_work_size[1] << ", " << default_work_size[2];
VLOG(4) << TargetToStr(boxcoder_param_->proposals->target());
VLOG(4) << "output shape: " << out_dims[0] << ", "
<< out_dims[1] << ", "
<< out_dims[2] << ", "
<< out_dims[3];
VLOG(4) << "image_shape(w,h):" << image_shape["width"] << " "
<< image_shape["height"];
VLOG(4) << "out_C = " << out_C;
VLOG(4) << "out_H = " << out_H;
VLOG(4) << "default_work_size = " << default_work_size[0] << ", "
<< default_work_size[1] << ", " << default_work_size[2];
#endif
int arg_idx = 0;
cl_int status = kernel.setArg(arg_idx++, *prior_box_image);
CL_CHECK_FATAL(status);
status = kernel.setArg(arg_idx++, *prior_box_var_image);
CL_CHECK_FATAL(status);
status = kernel.setArg(arg_idx++, *target_box_image);
CL_CHECK_FATAL(status);
status = kernel.setArg(arg_idx++, *out_buf);
CL_CHECK_FATAL(status);
status = kernel.setArg(arg_idx++, out_C);
CL_CHECK_FATAL(status);
status = kernel.setArg(arg_idx++, out_H);
CL_CHECK_FATAL(status);
auto global_work_size =
cl::NDRange{static_cast<cl::size_type>(default_work_size[0]),
static_cast<cl::size_type>(default_work_size[2])};
int arg_idx = 0;
cl_int status = kernel.setArg(arg_idx++, *prior_box_image);
CL_CHECK_FATAL(status);
status = kernel.setArg(arg_idx++, *prior_box_var_image);
CL_CHECK_FATAL(status);
status = kernel.setArg(arg_idx++, *target_box_image);
CL_CHECK_FATAL(status);
status = kernel.setArg(arg_idx++, *out_buf);
CL_CHECK_FATAL(status);
status = kernel.setArg(arg_idx++, out_C);
CL_CHECK_FATAL(status);
status = kernel.setArg(arg_idx++, out_H);
CL_CHECK_FATAL(status);
auto global_work_size =
cl::NDRange{static_cast<cl::size_type>(default_work_size[0]),
static_cast<cl::size_type>(default_work_size[2])};
status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel(
kernel,
cl::NullRange,
global_work_size,
cl::NullRange,
nullptr,
event_.get());
CL_CHECK_FATAL(status);
context.cl_wait_list()->emplace(out_buf, event_);
status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel(
kernel,
cl::NullRange,
global_work_size,
cl::NullRange,
nullptr,
event_.get());
CL_CHECK_FATAL(status);
context.cl_wait_list()->emplace(out_buf, event_);
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "global_work_size:[2D]:" << global_work_size[0] << " "
<< global_work_size[1];
VLOG(4) << "global_work_size:[2D]:" << global_work_size[0] << " "
<< global_work_size[1];
#endif
}
}
......
......@@ -24,14 +24,14 @@
namespace paddle {
namespace lite {
void box_coder_ref(float* proposals_data,
const float* anchors_data,
const float* bbox_deltas_data,
const float* variances_data,
int axis,
bool box_normalized,
std::string code_type,
int row,
int col) {
const float* anchors_data,
const float* bbox_deltas_data,
const float* variances_data,
int axis,
bool box_normalized,
std::string code_type,
int row,
int col) {
if (code_type == "decode_center_size") {
int anchor_len = 4;
int out_len = 4;
......@@ -99,178 +99,194 @@ TEST(box_coder_image2d, compute) {
const int axis = 0;
#endif // BOXCODER_FP16_LOOP_TEST
LOG(INFO) << "======== input shape[n,c,h,w]:" << n << " " << m
<< " ========";
LOG(INFO) << "======== parameters: norm = " << norm
<< ", axis = " << axis << "code_type: " << code_type;
LOG(INFO) << "======== input shape[n,c,h,w]:" << n << " " << m
<< " ========";
LOG(INFO) << "======== parameters: norm = " << norm
<< ", axis = " << axis << "code_type: " << code_type;
auto kernels = KernelRegistry::Global().Create(
"box_coder",
TARGET(kOpenCL),
PRECISION(kFP16),
DATALAYOUT(kImageDefault));
ASSERT_FALSE(kernels.empty());
auto kernel = std::move(kernels.front());
LOG(INFO) << "get kernel:" << kernel->doc();
auto kernels = KernelRegistry::Global().Create(
"box_coder",
TARGET(kOpenCL),
PRECISION(kFP16),
DATALAYOUT(kImageDefault));
ASSERT_FALSE(kernels.empty());
auto kernel = std::move(kernels.front());
LOG(INFO) << "get kernel:" << kernel->doc();
lite::Tensor prior_box, prior_box_var, target_box, output_box;
operators::BoxCoderParam param;
param.prior_box = &prior_box;
param.prior_box_var = &prior_box_var;
param.target_box = &target_box;
param.proposals = &output_box;
param.axis = axis;
param.box_normalized = norm;
param.code_type = code_type;
lite::Tensor prior_box, prior_box_var, target_box, output_box;
operators::BoxCoderParam param;
param.prior_box = &prior_box;
param.prior_box_var = &prior_box_var;
param.target_box = &target_box;
param.proposals = &output_box;
param.axis = axis;
param.box_normalized = norm;
param.code_type = code_type;
std::unique_ptr<KernelContext> context(new KernelContext);
context->As<OpenCLContext>().InitOnce();
std::unique_ptr<KernelContext> context(new KernelContext);
context->As<OpenCLContext>().InitOnce();
kernel->SetParam(param);
std::unique_ptr<KernelContext> boxcoder_context(
new KernelContext);
context->As<OpenCLContext>().CopySharedTo(
&(boxcoder_context->As<OpenCLContext>()));
kernel->SetContext(std::move(boxcoder_context));
kernel->SetParam(param);
std::unique_ptr<KernelContext> boxcoder_context(new KernelContext);
context->As<OpenCLContext>().CopySharedTo(
&(boxcoder_context->As<OpenCLContext>()));
kernel->SetContext(std::move(boxcoder_context));
const DDim prior_box_dims =
DDim(std::vector<DDim::value_type>{1, 1, m, 4});
const DDim prior_box_var_dims = DDim(std::vector<DDim::value_type>{1, 1, m, 4});
const DDim target_box_dims = DDim(std::vector<DDim::value_type>{1, n, m, 4});
const DDim prior_box_dims =
DDim(std::vector<DDim::value_type>{1, 1, m, 4});
const DDim prior_box_var_dims =
DDim(std::vector<DDim::value_type>{1, 1, m, 4});
const DDim target_box_dims =
DDim(std::vector<DDim::value_type>{1, n, m, 4});
const DDim out_dim =
DDim(std::vector<DDim::value_type>{1, n, m, 4});
prior_box.Resize(prior_box_dims);
prior_box_var.Resize(prior_box_var_dims);
target_box.Resize(target_box_dims);
output_box.Resize(out_dim);
const DDim out_dim =
DDim(std::vector<DDim::value_type>{1, n, m, 4});
prior_box.Resize(prior_box_dims);
prior_box_var.Resize(prior_box_var_dims);
target_box.Resize(target_box_dims);
output_box.Resize(out_dim);
std::vector<float> prior_box_data(prior_box_dims.production());
std::vector<float> prior_box_var_data(prior_box_var_dims.production());
std::vector<float> target_box_data(target_box_dims.production());
for (int i = 0; i < prior_box_dims.production(); i++) {
prior_box_data[i] = i * 1.1 / prior_box_dims.production();
}
for (int i = 0; i < prior_box_var_dims.production(); i++) {
prior_box_var_data[i] = i * 1.2 / prior_box_var_dims.production();
}
for (int i = 0; i < target_box_dims.production(); i++) {
target_box_data[i] = i * 1.3 / target_box_dims.production();
}
std::vector<float> prior_box_data(prior_box_dims.production());
std::vector<float> prior_box_var_data(prior_box_var_dims.production());
std::vector<float> target_box_data(target_box_dims.production());
for (int i = 0; i < prior_box_dims.production(); i++) {
prior_box_data[i] = i * 1.1 / prior_box_dims.production();
}
for (int i = 0; i < prior_box_var_dims.production(); i++) {
prior_box_var_data[i] = i * 1.2 / prior_box_var_dims.production();
}
for (int i = 0; i < target_box_dims.production(); i++) {
target_box_data[i] = i * 1.3 / target_box_dims.production();
}
LOG(INFO) << "prepare input";
CLImageConverterDefault* default_converter =
new CLImageConverterDefault();
DDim prior_box_image_shape =
default_converter->InitImageDimInfoWith(prior_box_dims);
LOG(INFO) << "prior_box_image_shape = " << prior_box_image_shape[0] << " "
<< prior_box_image_shape[1];
std::vector<half_t> prior_box_image_data(prior_box_image_shape.production() *
4); // 4 : RGBA
default_converter->NCHWToImage(
prior_box_data.data(), prior_box_image_data.data(), prior_box_dims);
auto* prior_box_image = prior_box.mutable_data<half_t, cl::Image2D>(
prior_box_image_shape[0], prior_box_image_shape[1], prior_box_image_data.data());
LOG(INFO) << "prepare input";
CLImageConverterDefault* default_converter =
new CLImageConverterDefault();
DDim prior_box_image_shape =
default_converter->InitImageDimInfoWith(prior_box_dims);
LOG(INFO) << "prior_box_image_shape = " << prior_box_image_shape[0] << " "
<< prior_box_image_shape[1];
std::vector<half_t> prior_box_image_data(
prior_box_image_shape.production() * 4); // 4 : RGBA
default_converter->NCHWToImage(
prior_box_data.data(),
prior_box_image_data.data(),
prior_box_dims);
auto* prior_box_image = prior_box.mutable_data<half_t, cl::Image2D>(
prior_box_image_shape[0],
prior_box_image_shape[1],
prior_box_image_data.data());
DDim prior_box_var_image_shape =
default_converter->InitImageDimInfoWith(prior_box_var_dims);
LOG(INFO) << "prior_box_var_image_shape = " << prior_box_var_image_shape[0] << " "
<< prior_box_var_image_shape[1];
std::vector<half_t> prior_box_var_image_data(prior_box_var_image_shape.production() *
4); // 4 : RGBA
default_converter->NCHWToImage(
prior_box_var_data.data(), prior_box_var_image_data.data(), prior_box_var_dims);
auto* prior_box_var_image = prior_box_var.mutable_data<half_t, cl::Image2D>(
prior_box_var_image_shape[0], prior_box_var_image_shape[1],
prior_box_var_image_data.data());
DDim prior_box_var_image_shape =
default_converter->InitImageDimInfoWith(prior_box_var_dims);
LOG(INFO) << "prior_box_var_image_shape = " << prior_box_var_image_shape[0] << " "
<< prior_box_var_image_shape[1];
std::vector<half_t> prior_box_var_image_data(
prior_box_var_image_shape.production() * 4); // 4 : RGBA
default_converter->NCHWToImage(
prior_box_var_data.data(),
prior_box_var_image_data.data(),
prior_box_var_dims);
auto* prior_box_var_image = prior_box_var.mutable_data<half_t, cl::Image2D>(
prior_box_var_image_shape[0],
prior_box_var_image_shape[1],
prior_box_var_image_data.data());
DDim target_box_image_shape =
default_converter->InitImageDimInfoWith(target_box_dims);
LOG(INFO) << "target_box_image_shape = " << target_box_image_shape[0] << " "
<< target_box_image_shape[1];
std::vector<half_t> target_box_image_data(target_box_image_shape.production() *
4); // 4 : RGBA
default_converter->NCHWToImage(
target_box_data.data(), target_box_image_data.data(), target_box_dims);
auto* target_box_image = target_box.mutable_data<half_t, cl::Image2D>(
target_box_image_shape[0], target_box_image_shape[1],
target_box_image_data.data());
DDim target_box_image_shape =
default_converter->InitImageDimInfoWith(target_box_dims);
LOG(INFO) << "target_box_image_shape = " << target_box_image_shape[0] << " "
<< target_box_image_shape[1];
std::vector<half_t> target_box_image_data(
target_box_image_shape.production() * 4); // 4 : RGBA
default_converter->NCHWToImage(
target_box_data.data(),
target_box_image_data.data(),
target_box_dims);
auto* target_box_image = target_box.mutable_data<half_t, cl::Image2D>(
target_box_image_shape[0],
target_box_image_shape[1],
target_box_image_data.data());
DDim out_image_shape =
default_converter->InitImageDimInfoWith(out_dim);
LOG(INFO) << "out_image_shape = " << out_image_shape[0] << " "
<< out_image_shape[1];
auto* out_image = output_box.mutable_data<half_t, cl::Image2D>(
out_image_shape[0], out_image_shape[1]);
kernel->Launch();
DDim out_image_shape =
default_converter->InitImageDimInfoWith(out_dim);
LOG(INFO) << "out_image_shape = " << out_image_shape[0] << " "
<< out_image_shape[1];
auto* out_image = output_box.mutable_data<half_t, cl::Image2D>(
out_image_shape[0], out_image_shape[1]);
kernel->Launch();
auto* wait_list = context->As<OpenCLContext>().cl_wait_list();
auto* out_ptr = param.proposals->data<half_t, cl::Image2D>();
auto it = wait_list->find(out_ptr);
if (it != wait_list->end()) {
VLOG(4) << "--- Find the sync event for the target cl "
"tensor. ---";
auto& event = *(it->second);
event.wait();
} else {
LOG(FATAL) << "Could not find the sync event for the "
auto* wait_list = context->As<OpenCLContext>().cl_wait_list();
auto* out_ptr = param.proposals->data<half_t, cl::Image2D>();
auto it = wait_list->find(out_ptr);
if (it != wait_list->end()) {
VLOG(4) << "--- Find the sync event for the target cl "
"tensor. ---";
auto& event = *(it->second);
event.wait();
} else {
LOG(FATAL) << "Could not find the sync event for the "
"target cl tensor.";
}
}
lite::Tensor out_ref_tensor;
out_ref_tensor.Resize(out_dim);
box_coder_ref(out_ref_tensor.mutable_data<float>(), prior_box_data.data(),
target_box_data.data(), prior_box_var_data.data(),
axis, norm, code_type, target_box_dims[0], target_box_dims[1]);
lite::Tensor out_ref_tensor;
out_ref_tensor.Resize(out_dim);
box_coder_ref(out_ref_tensor.mutable_data<float>(),
prior_box_data.data(),
target_box_data.data(),
prior_box_var_data.data(),
axis,
norm,
code_type,
target_box_dims[0],
target_box_dims[1]);
const size_t cl_image2d_row_pitch{0};
const size_t cl_image2d_slice_pitch{0};
half_t* out_image_data =
new half_t[out_image_shape.production() * 4];
TargetWrapperCL::ImgcpySync(out_image_data,
out_image,
out_image_shape[0],
out_image_shape[1],
cl_image2d_row_pitch,
cl_image2d_slice_pitch,
IoDirection::DtoH);
float* out_data = new float[out_image_shape.production() * 4];
default_converter->ImageToNCHW(
out_image_data, out_data, out_image_shape, out_dim);
const size_t cl_image2d_row_pitch{0};
const size_t cl_image2d_slice_pitch{0};
half_t* out_image_data =
new half_t[out_image_shape.production() * 4];
TargetWrapperCL::ImgcpySync(out_image_data,
out_image,
out_image_shape[0],
out_image_shape[1],
cl_image2d_row_pitch,
cl_image2d_slice_pitch,
IoDirection::DtoH);
float* out_data = new float[out_image_shape.production() * 4];
default_converter->ImageToNCHW(
out_image_data, out_data, out_image_shape, out_dim);
// result
#ifdef BOXCODER_FP16_PRINT_RESULT
LOG(INFO)
<< "---- print kernel result (input -> output) ----";
for (int eidx = 0; eidx < out_dim.production(); ++eidx) {
std::cout << target_box_data[eidx] << " -> " << out_data[eidx]
<< std::endl;
}
LOG(INFO) << "---- print kernel result (input -> output) ----";
for (int eidx = 0; eidx < out_dim.production(); ++eidx) {
std::cout << target_box_data[eidx] << " -> " << out_data[eidx]
<< std::endl;
}
#endif // BOXCODER_FP16_PRINT_RESULT
const float* out_ref = out_ref_tensor.data<float>();
for (int i = 0; i < out_dim.production(); i++) {
auto abs_diff = abs(out_data[i] - out_ref[i]);
auto relative_diff =
COMPUTE_RELATIVE_DIFF(out_data[i], out_ref[i]);
EXPECT_EQ((relative_diff <= FP16_MAX_DIFF) ||
(abs_diff <= FP16_MAX_DIFF),
true);
if ((relative_diff > FP16_MAX_DIFF) &&
(abs_diff > FP16_MAX_DIFF)) {
LOG(ERROR) << "error idx:" << i << ", in_data[" << i
<< "]: " << target_box_data[i] << ", out_data[" << i
<< "]: " << out_data[i] << ", out_ref[" << i
<< "]: " << out_ref[i]
<< ", abs_diff: " << abs_diff
<< ", relative_diff: " << relative_diff
<< ", FP16_MAX_DIFF: " << FP16_MAX_DIFF;
}
}
const float* out_ref = out_ref_tensor.data<float>();
for (int i = 0; i < out_dim.production(); i++) {
auto abs_diff = abs(out_data[i] - out_ref[i]);
auto relative_diff =
COMPUTE_RELATIVE_DIFF(out_data[i], out_ref[i]);
EXPECT_EQ((relative_diff <= FP16_MAX_DIFF) ||
(abs_diff <= FP16_MAX_DIFF),
true);
if ((relative_diff > FP16_MAX_DIFF) &&
(abs_diff > FP16_MAX_DIFF)) {
LOG(ERROR) << "error idx:" << i << ", in_data[" << i
<< "]: " << target_box_data[i] << ", out_data[" << i
<< "]: " << out_data[i] << ", out_ref[" << i
<< "]: " << out_ref[i]
<< ", abs_diff: " << abs_diff
<< ", relative_diff: " << relative_diff
<< ", FP16_MAX_DIFF: " << FP16_MAX_DIFF;
}
}
#ifdef BOXCODER_FP16_LOOP_TEST
} // axis
} // code_type
} // norm
} // m
} // n
} // axis
} // code_type
} // norm
} // m
} // n
#else
// nothing to do.
#endif
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册