提交 bdbca339 编写于 作者: C chenjiaoAngel

fix format. test=develop

上级 a81f190b
...@@ -28,9 +28,9 @@ namespace paddle { ...@@ -28,9 +28,9 @@ namespace paddle {
namespace lite { namespace lite {
namespace kernels { namespace kernels {
namespace opencl { namespace opencl {
class BoxCoderComputeImage : public KernelLite<TARGET(kOpenCL), class BoxCoderComputeImage : public KernelLite<TARGET(kOpenCL),
PRECISION(kFP16), PRECISION(kFP16),
DATALAYOUT(kImageDefault)> { DATALAYOUT(kImageDefault)> {
public: public:
using param_t = operators::BoxCoderParam; using param_t = operators::BoxCoderParam;
...@@ -39,10 +39,10 @@ namespace opencl { ...@@ -39,10 +39,10 @@ namespace opencl {
boxcoder_param_ = param_.get_mutable<param_t>(); boxcoder_param_ = param_.get_mutable<param_t>();
if (boxcoder_param_->code_type == "decode_center_size" && if (boxcoder_param_->code_type == "decode_center_size" &&
boxcoder_param_->box_normalized == true) { boxcoder_param_->box_normalized == true) {
kernel_func_name_ = "decode_center_size"; kernel_func_name_ = "decode_center_size";
} else { } else {
printf("This code_type %s doesn't support \n", boxcoder_param_->code_type.c_str()); printf("This code_type %s doesn't support \n", boxcoder_param_->code_type.c_str());
return; return;
} }
CHECK(context.cl_context() != nullptr); CHECK(context.cl_context() != nullptr);
VLOG(1) << "kernel_func_name_:" << kernel_func_name_; VLOG(1) << "kernel_func_name_:" << kernel_func_name_;
...@@ -55,8 +55,9 @@ namespace opencl { ...@@ -55,8 +55,9 @@ namespace opencl {
const auto& out_dims = boxcoder_param_->proposals->dims(); const auto& out_dims = boxcoder_param_->proposals->dims();
auto image_shape = InitImageDimInfoWith(out_dims); auto image_shape = InitImageDimInfoWith(out_dims);
auto* out_buf = boxcoder_param_->proposals->mutable_data<half_t, cl::Image2D>( auto* out_buf =
image_shape["width"], image_shape["height"]); boxcoder_param_->proposals->mutable_data<half_t, cl::Image2D>(
image_shape["width"], image_shape["height"]);
#ifndef LITE_SHUTDOWN_LOG #ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "boxcoder input shape: "; VLOG(4) << "boxcoder input shape: ";
...@@ -67,70 +68,70 @@ namespace opencl { ...@@ -67,70 +68,70 @@ namespace opencl {
const auto* input_targetbox = boxcoder_param_->target_box; const auto* input_targetbox = boxcoder_param_->target_box;
const auto& code_type = boxcoder_param_->code_type; const auto& code_type = boxcoder_param_->code_type;
if (code_type == "decode_center_size") { if (code_type == "decode_center_size") {
auto* prior_box_image = input_priorbox->data<half_t, cl::Image2D>(); auto* prior_box_image = input_priorbox->data<half_t, cl::Image2D>();
auto* prior_box_var_image = input_priorboxvar->data<half_t, cl::Image2D>(); auto* prior_box_var_image = input_priorboxvar->data<half_t, cl::Image2D>();
auto* target_box_image = input_targetbox->data<half_t, cl::Image2D>(); auto* target_box_image = input_targetbox->data<half_t, cl::Image2D>();
int new_dims[4] = {1, 1, 1, 1}; int new_dims[4] = {1, 1, 1, 1};
for (int i = 0; i < out_dims.size(); i++) { for (int i = 0; i < out_dims.size(); i++) {
new_dims[4 - out_dims.size() + i] = out_dims[i]; new_dims[4 - out_dims.size() + i] = out_dims[i];
} }
auto& context = ctx_->As<OpenCLContext>(); auto& context = ctx_->As<OpenCLContext>();
CHECK(context.cl_context() != nullptr); CHECK(context.cl_context() != nullptr);
STL::stringstream kernel_key; STL::stringstream kernel_key;
kernel_key << kernel_func_name_ << build_options_; kernel_key << kernel_func_name_ << build_options_;
auto kernel = context.cl_context()->GetKernel(kernel_key.str()); auto kernel = context.cl_context()->GetKernel(kernel_key.str());
auto default_work_size = DefaultWorkSize(out_dims, auto default_work_size = DefaultWorkSize(out_dims,
DDim(std::vector<DDim::value_type>{ DDim(std::vector<DDim::value_type>{
static_cast<int64_t>(image_shape["width"]), static_cast<int64_t>(image_shape["width"]),
static_cast<int64_t>(image_shape["height"])})); static_cast<int64_t>(image_shape["height"])}));
int out_C = new_dims[1]; int out_C = new_dims[1];
int out_H = new_dims[2]; int out_H = new_dims[2];
#ifndef LITE_SHUTDOWN_LOG #ifndef LITE_SHUTDOWN_LOG
VLOG(4) << TargetToStr(boxcoder_param_->proposals->target()); VLOG(4) << TargetToStr(boxcoder_param_->proposals->target());
VLOG(4) << "output shape: " << out_dims[0] << ", " << VLOG(4) << "output shape: " << out_dims[0] << ", "
out_dims[1] << ", " << << out_dims[1] << ", "
out_dims[2] << ", " << << out_dims[2] << ", "
out_dims[3]; << out_dims[3];
VLOG(4) << "image_shape(w,h):" << image_shape["width"] << " " VLOG(4) << "image_shape(w,h):" << image_shape["width"] << " "
<< image_shape["height"]; << image_shape["height"];
VLOG(4) << "out_C = " << out_C; VLOG(4) << "out_C = " << out_C;
VLOG(4) << "out_H = " << out_H; VLOG(4) << "out_H = " << out_H;
VLOG(4) << "default_work_size = " << default_work_size[0] << ", " VLOG(4) << "default_work_size = " << default_work_size[0] << ", "
<< default_work_size[1] << ", " << default_work_size[2]; << default_work_size[1] << ", " << default_work_size[2];
#endif #endif
int arg_idx = 0; int arg_idx = 0;
cl_int status = kernel.setArg(arg_idx++, *prior_box_image); cl_int status = kernel.setArg(arg_idx++, *prior_box_image);
CL_CHECK_FATAL(status); CL_CHECK_FATAL(status);
status = kernel.setArg(arg_idx++, *prior_box_var_image); status = kernel.setArg(arg_idx++, *prior_box_var_image);
CL_CHECK_FATAL(status); CL_CHECK_FATAL(status);
status = kernel.setArg(arg_idx++, *target_box_image); status = kernel.setArg(arg_idx++, *target_box_image);
CL_CHECK_FATAL(status); CL_CHECK_FATAL(status);
status = kernel.setArg(arg_idx++, *out_buf); status = kernel.setArg(arg_idx++, *out_buf);
CL_CHECK_FATAL(status); CL_CHECK_FATAL(status);
status = kernel.setArg(arg_idx++, out_C); status = kernel.setArg(arg_idx++, out_C);
CL_CHECK_FATAL(status); CL_CHECK_FATAL(status);
status = kernel.setArg(arg_idx++, out_H); status = kernel.setArg(arg_idx++, out_H);
CL_CHECK_FATAL(status); CL_CHECK_FATAL(status);
auto global_work_size = auto global_work_size =
cl::NDRange{static_cast<cl::size_type>(default_work_size[0]), cl::NDRange{static_cast<cl::size_type>(default_work_size[0]),
static_cast<cl::size_type>(default_work_size[2])}; static_cast<cl::size_type>(default_work_size[2])};
status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel( status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel(
kernel, kernel,
cl::NullRange, cl::NullRange,
global_work_size, global_work_size,
cl::NullRange, cl::NullRange,
nullptr, nullptr,
event_.get()); event_.get());
CL_CHECK_FATAL(status); CL_CHECK_FATAL(status);
context.cl_wait_list()->emplace(out_buf, event_); context.cl_wait_list()->emplace(out_buf, event_);
#ifndef LITE_SHUTDOWN_LOG #ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "global_work_size:[2D]:" << global_work_size[0] << " " VLOG(4) << "global_work_size:[2D]:" << global_work_size[0] << " "
<< global_work_size[1]; << global_work_size[1];
#endif #endif
} }
} }
......
...@@ -24,14 +24,14 @@ ...@@ -24,14 +24,14 @@
namespace paddle { namespace paddle {
namespace lite { namespace lite {
void box_coder_ref(float* proposals_data, void box_coder_ref(float* proposals_data,
const float* anchors_data, const float* anchors_data,
const float* bbox_deltas_data, const float* bbox_deltas_data,
const float* variances_data, const float* variances_data,
int axis, int axis,
bool box_normalized, bool box_normalized,
std::string code_type, std::string code_type,
int row, int row,
int col) { int col) {
if (code_type == "decode_center_size") { if (code_type == "decode_center_size") {
int anchor_len = 4; int anchor_len = 4;
int out_len = 4; int out_len = 4;
...@@ -99,178 +99,194 @@ TEST(box_coder_image2d, compute) { ...@@ -99,178 +99,194 @@ TEST(box_coder_image2d, compute) {
const int axis = 0; const int axis = 0;
#endif // BOXCODER_FP16_LOOP_TEST #endif // BOXCODER_FP16_LOOP_TEST
LOG(INFO) << "======== input shape[n,c,h,w]:" << n << " " << m LOG(INFO) << "======== input shape[n,c,h,w]:" << n << " " << m
<< " ========"; << " ========";
LOG(INFO) << "======== parameters: norm = " << norm LOG(INFO) << "======== parameters: norm = " << norm
<< ", axis = " << axis << "code_type: " << code_type; << ", axis = " << axis << "code_type: " << code_type;
auto kernels = KernelRegistry::Global().Create( auto kernels = KernelRegistry::Global().Create(
"box_coder", "box_coder",
TARGET(kOpenCL), TARGET(kOpenCL),
PRECISION(kFP16), PRECISION(kFP16),
DATALAYOUT(kImageDefault)); DATALAYOUT(kImageDefault));
ASSERT_FALSE(kernels.empty()); ASSERT_FALSE(kernels.empty());
auto kernel = std::move(kernels.front()); auto kernel = std::move(kernels.front());
LOG(INFO) << "get kernel:" << kernel->doc(); LOG(INFO) << "get kernel:" << kernel->doc();
lite::Tensor prior_box, prior_box_var, target_box, output_box; lite::Tensor prior_box, prior_box_var, target_box, output_box;
operators::BoxCoderParam param; operators::BoxCoderParam param;
param.prior_box = &prior_box; param.prior_box = &prior_box;
param.prior_box_var = &prior_box_var; param.prior_box_var = &prior_box_var;
param.target_box = &target_box; param.target_box = &target_box;
param.proposals = &output_box; param.proposals = &output_box;
param.axis = axis; param.axis = axis;
param.box_normalized = norm; param.box_normalized = norm;
param.code_type = code_type; param.code_type = code_type;
std::unique_ptr<KernelContext> context(new KernelContext); std::unique_ptr<KernelContext> context(new KernelContext);
context->As<OpenCLContext>().InitOnce(); context->As<OpenCLContext>().InitOnce();
kernel->SetParam(param); kernel->SetParam(param);
std::unique_ptr<KernelContext> boxcoder_context( std::unique_ptr<KernelContext> boxcoder_context(new KernelContext);
new KernelContext); context->As<OpenCLContext>().CopySharedTo(
context->As<OpenCLContext>().CopySharedTo( &(boxcoder_context->As<OpenCLContext>()));
&(boxcoder_context->As<OpenCLContext>())); kernel->SetContext(std::move(boxcoder_context));
kernel->SetContext(std::move(boxcoder_context));
const DDim prior_box_dims = const DDim prior_box_dims =
DDim(std::vector<DDim::value_type>{1, 1, m, 4}); DDim(std::vector<DDim::value_type>{1, 1, m, 4});
const DDim prior_box_var_dims = DDim(std::vector<DDim::value_type>{1, 1, m, 4}); const DDim prior_box_var_dims =
const DDim target_box_dims = DDim(std::vector<DDim::value_type>{1, n, m, 4}); DDim(std::vector<DDim::value_type>{1, 1, m, 4});
const DDim target_box_dims =
DDim(std::vector<DDim::value_type>{1, n, m, 4});
const DDim out_dim = const DDim out_dim =
DDim(std::vector<DDim::value_type>{1, n, m, 4}); DDim(std::vector<DDim::value_type>{1, n, m, 4});
prior_box.Resize(prior_box_dims); prior_box.Resize(prior_box_dims);
prior_box_var.Resize(prior_box_var_dims); prior_box_var.Resize(prior_box_var_dims);
target_box.Resize(target_box_dims); target_box.Resize(target_box_dims);
output_box.Resize(out_dim); output_box.Resize(out_dim);
std::vector<float> prior_box_data(prior_box_dims.production()); std::vector<float> prior_box_data(prior_box_dims.production());
std::vector<float> prior_box_var_data(prior_box_var_dims.production()); std::vector<float> prior_box_var_data(prior_box_var_dims.production());
std::vector<float> target_box_data(target_box_dims.production()); std::vector<float> target_box_data(target_box_dims.production());
for (int i = 0; i < prior_box_dims.production(); i++) { for (int i = 0; i < prior_box_dims.production(); i++) {
prior_box_data[i] = i * 1.1 / prior_box_dims.production(); prior_box_data[i] = i * 1.1 / prior_box_dims.production();
} }
for (int i = 0; i < prior_box_var_dims.production(); i++) { for (int i = 0; i < prior_box_var_dims.production(); i++) {
prior_box_var_data[i] = i * 1.2 / prior_box_var_dims.production(); prior_box_var_data[i] = i * 1.2 / prior_box_var_dims.production();
} }
for (int i = 0; i < target_box_dims.production(); i++) { for (int i = 0; i < target_box_dims.production(); i++) {
target_box_data[i] = i * 1.3 / target_box_dims.production(); target_box_data[i] = i * 1.3 / target_box_dims.production();
} }
LOG(INFO) << "prepare input"; LOG(INFO) << "prepare input";
CLImageConverterDefault* default_converter = CLImageConverterDefault* default_converter =
new CLImageConverterDefault(); new CLImageConverterDefault();
DDim prior_box_image_shape = DDim prior_box_image_shape =
default_converter->InitImageDimInfoWith(prior_box_dims); default_converter->InitImageDimInfoWith(prior_box_dims);
LOG(INFO) << "prior_box_image_shape = " << prior_box_image_shape[0] << " " LOG(INFO) << "prior_box_image_shape = " << prior_box_image_shape[0] << " "
<< prior_box_image_shape[1]; << prior_box_image_shape[1];
std::vector<half_t> prior_box_image_data(prior_box_image_shape.production() * std::vector<half_t> prior_box_image_data(
4); // 4 : RGBA prior_box_image_shape.production() * 4); // 4 : RGBA
default_converter->NCHWToImage( default_converter->NCHWToImage(
prior_box_data.data(), prior_box_image_data.data(), prior_box_dims); prior_box_data.data(),
auto* prior_box_image = prior_box.mutable_data<half_t, cl::Image2D>( prior_box_image_data.data(),
prior_box_image_shape[0], prior_box_image_shape[1], prior_box_image_data.data()); prior_box_dims);
auto* prior_box_image = prior_box.mutable_data<half_t, cl::Image2D>(
prior_box_image_shape[0],
prior_box_image_shape[1],
prior_box_image_data.data());
DDim prior_box_var_image_shape = DDim prior_box_var_image_shape =
default_converter->InitImageDimInfoWith(prior_box_var_dims); default_converter->InitImageDimInfoWith(prior_box_var_dims);
LOG(INFO) << "prior_box_var_image_shape = " << prior_box_var_image_shape[0] << " " LOG(INFO) << "prior_box_var_image_shape = " << prior_box_var_image_shape[0] << " "
<< prior_box_var_image_shape[1]; << prior_box_var_image_shape[1];
std::vector<half_t> prior_box_var_image_data(prior_box_var_image_shape.production() * std::vector<half_t> prior_box_var_image_data(
4); // 4 : RGBA prior_box_var_image_shape.production() * 4); // 4 : RGBA
default_converter->NCHWToImage( default_converter->NCHWToImage(
prior_box_var_data.data(), prior_box_var_image_data.data(), prior_box_var_dims); prior_box_var_data.data(),
auto* prior_box_var_image = prior_box_var.mutable_data<half_t, cl::Image2D>( prior_box_var_image_data.data(),
prior_box_var_image_shape[0], prior_box_var_image_shape[1], prior_box_var_dims);
prior_box_var_image_data.data()); auto* prior_box_var_image = prior_box_var.mutable_data<half_t, cl::Image2D>(
prior_box_var_image_shape[0],
prior_box_var_image_shape[1],
prior_box_var_image_data.data());
DDim target_box_image_shape = DDim target_box_image_shape =
default_converter->InitImageDimInfoWith(target_box_dims); default_converter->InitImageDimInfoWith(target_box_dims);
LOG(INFO) << "target_box_image_shape = " << target_box_image_shape[0] << " " LOG(INFO) << "target_box_image_shape = " << target_box_image_shape[0] << " "
<< target_box_image_shape[1]; << target_box_image_shape[1];
std::vector<half_t> target_box_image_data(target_box_image_shape.production() * std::vector<half_t> target_box_image_data(
4); // 4 : RGBA target_box_image_shape.production() * 4); // 4 : RGBA
default_converter->NCHWToImage( default_converter->NCHWToImage(
target_box_data.data(), target_box_image_data.data(), target_box_dims); target_box_data.data(),
auto* target_box_image = target_box.mutable_data<half_t, cl::Image2D>( target_box_image_data.data(),
target_box_image_shape[0], target_box_image_shape[1], target_box_dims);
target_box_image_data.data()); auto* target_box_image = target_box.mutable_data<half_t, cl::Image2D>(
target_box_image_shape[0],
target_box_image_shape[1],
target_box_image_data.data());
DDim out_image_shape = DDim out_image_shape =
default_converter->InitImageDimInfoWith(out_dim); default_converter->InitImageDimInfoWith(out_dim);
LOG(INFO) << "out_image_shape = " << out_image_shape[0] << " " LOG(INFO) << "out_image_shape = " << out_image_shape[0] << " "
<< out_image_shape[1]; << out_image_shape[1];
auto* out_image = output_box.mutable_data<half_t, cl::Image2D>( auto* out_image = output_box.mutable_data<half_t, cl::Image2D>(
out_image_shape[0], out_image_shape[1]); out_image_shape[0], out_image_shape[1]);
kernel->Launch(); kernel->Launch();
auto* wait_list = context->As<OpenCLContext>().cl_wait_list(); auto* wait_list = context->As<OpenCLContext>().cl_wait_list();
auto* out_ptr = param.proposals->data<half_t, cl::Image2D>(); auto* out_ptr = param.proposals->data<half_t, cl::Image2D>();
auto it = wait_list->find(out_ptr); auto it = wait_list->find(out_ptr);
if (it != wait_list->end()) { if (it != wait_list->end()) {
VLOG(4) << "--- Find the sync event for the target cl " VLOG(4) << "--- Find the sync event for the target cl "
"tensor. ---"; "tensor. ---";
auto& event = *(it->second); auto& event = *(it->second);
event.wait(); event.wait();
} else { } else {
LOG(FATAL) << "Could not find the sync event for the " LOG(FATAL) << "Could not find the sync event for the "
"target cl tensor."; "target cl tensor.";
} }
lite::Tensor out_ref_tensor; lite::Tensor out_ref_tensor;
out_ref_tensor.Resize(out_dim); out_ref_tensor.Resize(out_dim);
box_coder_ref(out_ref_tensor.mutable_data<float>(), prior_box_data.data(), box_coder_ref(out_ref_tensor.mutable_data<float>(),
target_box_data.data(), prior_box_var_data.data(), prior_box_data.data(),
axis, norm, code_type, target_box_dims[0], target_box_dims[1]); target_box_data.data(),
prior_box_var_data.data(),
axis,
norm,
code_type,
target_box_dims[0],
target_box_dims[1]);
const size_t cl_image2d_row_pitch{0}; const size_t cl_image2d_row_pitch{0};
const size_t cl_image2d_slice_pitch{0}; const size_t cl_image2d_slice_pitch{0};
half_t* out_image_data = half_t* out_image_data =
new half_t[out_image_shape.production() * 4]; new half_t[out_image_shape.production() * 4];
TargetWrapperCL::ImgcpySync(out_image_data, TargetWrapperCL::ImgcpySync(out_image_data,
out_image, out_image,
out_image_shape[0], out_image_shape[0],
out_image_shape[1], out_image_shape[1],
cl_image2d_row_pitch, cl_image2d_row_pitch,
cl_image2d_slice_pitch, cl_image2d_slice_pitch,
IoDirection::DtoH); IoDirection::DtoH);
float* out_data = new float[out_image_shape.production() * 4]; float* out_data = new float[out_image_shape.production() * 4];
default_converter->ImageToNCHW( default_converter->ImageToNCHW(
out_image_data, out_data, out_image_shape, out_dim); out_image_data, out_data, out_image_shape, out_dim);
// result // result
#ifdef BOXCODER_FP16_PRINT_RESULT #ifdef BOXCODER_FP16_PRINT_RESULT
LOG(INFO) LOG(INFO) << "---- print kernel result (input -> output) ----";
<< "---- print kernel result (input -> output) ----"; for (int eidx = 0; eidx < out_dim.production(); ++eidx) {
for (int eidx = 0; eidx < out_dim.production(); ++eidx) { std::cout << target_box_data[eidx] << " -> " << out_data[eidx]
std::cout << target_box_data[eidx] << " -> " << out_data[eidx] << std::endl;
<< std::endl; }
}
#endif // BOXCODER_FP16_PRINT_RESULT #endif // BOXCODER_FP16_PRINT_RESULT
const float* out_ref = out_ref_tensor.data<float>(); const float* out_ref = out_ref_tensor.data<float>();
for (int i = 0; i < out_dim.production(); i++) { for (int i = 0; i < out_dim.production(); i++) {
auto abs_diff = abs(out_data[i] - out_ref[i]); auto abs_diff = abs(out_data[i] - out_ref[i]);
auto relative_diff = auto relative_diff =
COMPUTE_RELATIVE_DIFF(out_data[i], out_ref[i]); COMPUTE_RELATIVE_DIFF(out_data[i], out_ref[i]);
EXPECT_EQ((relative_diff <= FP16_MAX_DIFF) || EXPECT_EQ((relative_diff <= FP16_MAX_DIFF) ||
(abs_diff <= FP16_MAX_DIFF), (abs_diff <= FP16_MAX_DIFF),
true); true);
if ((relative_diff > FP16_MAX_DIFF) && if ((relative_diff > FP16_MAX_DIFF) &&
(abs_diff > FP16_MAX_DIFF)) { (abs_diff > FP16_MAX_DIFF)) {
LOG(ERROR) << "error idx:" << i << ", in_data[" << i LOG(ERROR) << "error idx:" << i << ", in_data[" << i
<< "]: " << target_box_data[i] << ", out_data[" << i << "]: " << target_box_data[i] << ", out_data[" << i
<< "]: " << out_data[i] << ", out_ref[" << i << "]: " << out_data[i] << ", out_ref[" << i
<< "]: " << out_ref[i] << "]: " << out_ref[i]
<< ", abs_diff: " << abs_diff << ", abs_diff: " << abs_diff
<< ", relative_diff: " << relative_diff << ", relative_diff: " << relative_diff
<< ", FP16_MAX_DIFF: " << FP16_MAX_DIFF; << ", FP16_MAX_DIFF: " << FP16_MAX_DIFF;
} }
} }
#ifdef BOXCODER_FP16_LOOP_TEST #ifdef BOXCODER_FP16_LOOP_TEST
} // axis } // axis
} // code_type } // code_type
} // norm } // norm
} // m } // m
} // n } // n
#else #else
// nothing to do. // nothing to do.
#endif #endif
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册