diff --git a/lite/kernels/opencl/box_coder_image_compute.cc b/lite/kernels/opencl/box_coder_image_compute.cc index 295317c34498adc5f152277aae898b95e6abe89d..8135a47a4a6258b0a6af0d16c97366fe47e6d463 100644 --- a/lite/kernels/opencl/box_coder_image_compute.cc +++ b/lite/kernels/opencl/box_coder_image_compute.cc @@ -28,9 +28,9 @@ namespace paddle { namespace lite { namespace kernels { namespace opencl { - class BoxCoderComputeImage : public KernelLite { +class BoxCoderComputeImage : public KernelLite { public: using param_t = operators::BoxCoderParam; @@ -39,10 +39,10 @@ namespace opencl { boxcoder_param_ = param_.get_mutable(); if (boxcoder_param_->code_type == "decode_center_size" && boxcoder_param_->box_normalized == true) { - kernel_func_name_ = "decode_center_size"; + kernel_func_name_ = "decode_center_size"; } else { - printf("This code_type %s doesn't support \n", boxcoder_param_->code_type.c_str()); - return; + printf("This code_type %s doesn't support \n", boxcoder_param_->code_type.c_str()); + return; } CHECK(context.cl_context() != nullptr); VLOG(1) << "kernel_func_name_:" << kernel_func_name_; @@ -55,8 +55,9 @@ namespace opencl { const auto& out_dims = boxcoder_param_->proposals->dims(); auto image_shape = InitImageDimInfoWith(out_dims); - auto* out_buf = boxcoder_param_->proposals->mutable_data( - image_shape["width"], image_shape["height"]); + auto* out_buf = + boxcoder_param_->proposals->mutable_data( + image_shape["width"], image_shape["height"]); #ifndef LITE_SHUTDOWN_LOG VLOG(4) << "boxcoder input shape: "; @@ -67,70 +68,70 @@ namespace opencl { const auto* input_targetbox = boxcoder_param_->target_box; const auto& code_type = boxcoder_param_->code_type; if (code_type == "decode_center_size") { - auto* prior_box_image = input_priorbox->data(); - auto* prior_box_var_image = input_priorboxvar->data(); - auto* target_box_image = input_targetbox->data(); + auto* prior_box_image = input_priorbox->data(); + auto* prior_box_var_image = input_priorboxvar->data(); + auto* target_box_image = input_targetbox->data(); - int new_dims[4] = {1, 1, 1, 1}; - for (int i = 0; i < out_dims.size(); i++) { - new_dims[4 - out_dims.size() + i] = out_dims[i]; - } - auto& context = ctx_->As(); - CHECK(context.cl_context() != nullptr); - STL::stringstream kernel_key; - kernel_key << kernel_func_name_ << build_options_; - auto kernel = context.cl_context()->GetKernel(kernel_key.str()); + int new_dims[4] = {1, 1, 1, 1}; + for (int i = 0; i < out_dims.size(); i++) { + new_dims[4 - out_dims.size() + i] = out_dims[i]; + } + auto& context = ctx_->As(); + CHECK(context.cl_context() != nullptr); + STL::stringstream kernel_key; + kernel_key << kernel_func_name_ << build_options_; + auto kernel = context.cl_context()->GetKernel(kernel_key.str()); - auto default_work_size = DefaultWorkSize(out_dims, - DDim(std::vector{ - static_cast(image_shape["width"]), - static_cast(image_shape["height"])})); + auto default_work_size = DefaultWorkSize(out_dims, + DDim(std::vector{ + static_cast(image_shape["width"]), + static_cast(image_shape["height"])})); - int out_C = new_dims[1]; - int out_H = new_dims[2]; + int out_C = new_dims[1]; + int out_H = new_dims[2]; #ifndef LITE_SHUTDOWN_LOG - VLOG(4) << TargetToStr(boxcoder_param_->proposals->target()); - VLOG(4) << "output shape: " << out_dims[0] << ", " << - out_dims[1] << ", " << - out_dims[2] << ", " << - out_dims[3]; - VLOG(4) << "image_shape(w,h):" << image_shape["width"] << " " - << image_shape["height"]; - VLOG(4) << "out_C = " << out_C; - VLOG(4) << "out_H = " << out_H; - VLOG(4) << "default_work_size = " << default_work_size[0] << ", " - << default_work_size[1] << ", " << default_work_size[2]; + VLOG(4) << TargetToStr(boxcoder_param_->proposals->target()); + VLOG(4) << "output shape: " << out_dims[0] << ", " + << out_dims[1] << ", " + << out_dims[2] << ", " + << out_dims[3]; + VLOG(4) << "image_shape(w,h):" << image_shape["width"] << " " + << image_shape["height"]; + VLOG(4) << "out_C = " << out_C; + VLOG(4) << "out_H = " << out_H; + VLOG(4) << "default_work_size = " << default_work_size[0] << ", " + << default_work_size[1] << ", " << default_work_size[2]; #endif - int arg_idx = 0; - cl_int status = kernel.setArg(arg_idx++, *prior_box_image); - CL_CHECK_FATAL(status); - status = kernel.setArg(arg_idx++, *prior_box_var_image); - CL_CHECK_FATAL(status); - status = kernel.setArg(arg_idx++, *target_box_image); - CL_CHECK_FATAL(status); - status = kernel.setArg(arg_idx++, *out_buf); - CL_CHECK_FATAL(status); - status = kernel.setArg(arg_idx++, out_C); - CL_CHECK_FATAL(status); - status = kernel.setArg(arg_idx++, out_H); - CL_CHECK_FATAL(status); - auto global_work_size = - cl::NDRange{static_cast(default_work_size[0]), - static_cast(default_work_size[2])}; + int arg_idx = 0; + cl_int status = kernel.setArg(arg_idx++, *prior_box_image); + CL_CHECK_FATAL(status); + status = kernel.setArg(arg_idx++, *prior_box_var_image); + CL_CHECK_FATAL(status); + status = kernel.setArg(arg_idx++, *target_box_image); + CL_CHECK_FATAL(status); + status = kernel.setArg(arg_idx++, *out_buf); + CL_CHECK_FATAL(status); + status = kernel.setArg(arg_idx++, out_C); + CL_CHECK_FATAL(status); + status = kernel.setArg(arg_idx++, out_H); + CL_CHECK_FATAL(status); + auto global_work_size = + cl::NDRange{static_cast(default_work_size[0]), + static_cast(default_work_size[2])}; - status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel( - kernel, - cl::NullRange, - global_work_size, - cl::NullRange, - nullptr, - event_.get()); - CL_CHECK_FATAL(status); - context.cl_wait_list()->emplace(out_buf, event_); + status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel( + kernel, + cl::NullRange, + global_work_size, + cl::NullRange, + nullptr, + event_.get()); + CL_CHECK_FATAL(status); + context.cl_wait_list()->emplace(out_buf, event_); #ifndef LITE_SHUTDOWN_LOG - VLOG(4) << "global_work_size:[2D]:" << global_work_size[0] << " " - << global_work_size[1]; + VLOG(4) << "global_work_size:[2D]:" << global_work_size[0] << " " + << global_work_size[1]; #endif } } diff --git a/lite/kernels/opencl/box_coder_image_compute_test.cc b/lite/kernels/opencl/box_coder_image_compute_test.cc index a50115967863efdff83b99d3d25144a8768707ca..25e4c9790f47ce5e2bec60670a1820b32d9adcf5 100644 --- a/lite/kernels/opencl/box_coder_image_compute_test.cc +++ b/lite/kernels/opencl/box_coder_image_compute_test.cc @@ -24,14 +24,14 @@ namespace paddle { namespace lite { void box_coder_ref(float* proposals_data, - const float* anchors_data, - const float* bbox_deltas_data, - const float* variances_data, - int axis, - bool box_normalized, - std::string code_type, - int row, - int col) { + const float* anchors_data, + const float* bbox_deltas_data, + const float* variances_data, + int axis, + bool box_normalized, + std::string code_type, + int row, + int col) { if (code_type == "decode_center_size") { int anchor_len = 4; int out_len = 4; @@ -99,178 +99,194 @@ TEST(box_coder_image2d, compute) { const int axis = 0; #endif // BOXCODER_FP16_LOOP_TEST - LOG(INFO) << "======== input shape[n,c,h,w]:" << n << " " << m - << " ========"; - LOG(INFO) << "======== parameters: norm = " << norm - << ", axis = " << axis << "code_type: " << code_type; + LOG(INFO) << "======== input shape[n,c,h,w]:" << n << " " << m + << " ========"; + LOG(INFO) << "======== parameters: norm = " << norm + << ", axis = " << axis << "code_type: " << code_type; - auto kernels = KernelRegistry::Global().Create( - "box_coder", - TARGET(kOpenCL), - PRECISION(kFP16), - DATALAYOUT(kImageDefault)); - ASSERT_FALSE(kernels.empty()); - auto kernel = std::move(kernels.front()); - LOG(INFO) << "get kernel:" << kernel->doc(); + auto kernels = KernelRegistry::Global().Create( + "box_coder", + TARGET(kOpenCL), + PRECISION(kFP16), + DATALAYOUT(kImageDefault)); + ASSERT_FALSE(kernels.empty()); + auto kernel = std::move(kernels.front()); + LOG(INFO) << "get kernel:" << kernel->doc(); - lite::Tensor prior_box, prior_box_var, target_box, output_box; - operators::BoxCoderParam param; - param.prior_box = &prior_box; - param.prior_box_var = &prior_box_var; - param.target_box = &target_box; - param.proposals = &output_box; - param.axis = axis; - param.box_normalized = norm; - param.code_type = code_type; + lite::Tensor prior_box, prior_box_var, target_box, output_box; + operators::BoxCoderParam param; + param.prior_box = &prior_box; + param.prior_box_var = &prior_box_var; + param.target_box = &target_box; + param.proposals = &output_box; + param.axis = axis; + param.box_normalized = norm; + param.code_type = code_type; - std::unique_ptr context(new KernelContext); - context->As().InitOnce(); + std::unique_ptr context(new KernelContext); + context->As().InitOnce(); - kernel->SetParam(param); - std::unique_ptr boxcoder_context( - new KernelContext); - context->As().CopySharedTo( - &(boxcoder_context->As())); - kernel->SetContext(std::move(boxcoder_context)); + kernel->SetParam(param); + std::unique_ptr boxcoder_context(new KernelContext); + context->As().CopySharedTo( + &(boxcoder_context->As())); + kernel->SetContext(std::move(boxcoder_context)); - const DDim prior_box_dims = - DDim(std::vector{1, 1, m, 4}); - const DDim prior_box_var_dims = DDim(std::vector{1, 1, m, 4}); - const DDim target_box_dims = DDim(std::vector{1, n, m, 4}); + const DDim prior_box_dims = + DDim(std::vector{1, 1, m, 4}); + const DDim prior_box_var_dims = + DDim(std::vector{1, 1, m, 4}); + const DDim target_box_dims = + DDim(std::vector{1, n, m, 4}); - const DDim out_dim = - DDim(std::vector{1, n, m, 4}); - prior_box.Resize(prior_box_dims); - prior_box_var.Resize(prior_box_var_dims); - target_box.Resize(target_box_dims); - output_box.Resize(out_dim); + const DDim out_dim = + DDim(std::vector{1, n, m, 4}); + prior_box.Resize(prior_box_dims); + prior_box_var.Resize(prior_box_var_dims); + target_box.Resize(target_box_dims); + output_box.Resize(out_dim); - std::vector prior_box_data(prior_box_dims.production()); - std::vector prior_box_var_data(prior_box_var_dims.production()); - std::vector target_box_data(target_box_dims.production()); - for (int i = 0; i < prior_box_dims.production(); i++) { - prior_box_data[i] = i * 1.1 / prior_box_dims.production(); - } - for (int i = 0; i < prior_box_var_dims.production(); i++) { - prior_box_var_data[i] = i * 1.2 / prior_box_var_dims.production(); - } - for (int i = 0; i < target_box_dims.production(); i++) { - target_box_data[i] = i * 1.3 / target_box_dims.production(); - } + std::vector prior_box_data(prior_box_dims.production()); + std::vector prior_box_var_data(prior_box_var_dims.production()); + std::vector target_box_data(target_box_dims.production()); + for (int i = 0; i < prior_box_dims.production(); i++) { + prior_box_data[i] = i * 1.1 / prior_box_dims.production(); + } + for (int i = 0; i < prior_box_var_dims.production(); i++) { + prior_box_var_data[i] = i * 1.2 / prior_box_var_dims.production(); + } + for (int i = 0; i < target_box_dims.production(); i++) { + target_box_data[i] = i * 1.3 / target_box_dims.production(); + } - LOG(INFO) << "prepare input"; - CLImageConverterDefault* default_converter = - new CLImageConverterDefault(); - DDim prior_box_image_shape = - default_converter->InitImageDimInfoWith(prior_box_dims); - LOG(INFO) << "prior_box_image_shape = " << prior_box_image_shape[0] << " " - << prior_box_image_shape[1]; - std::vector prior_box_image_data(prior_box_image_shape.production() * - 4); // 4 : RGBA - default_converter->NCHWToImage( - prior_box_data.data(), prior_box_image_data.data(), prior_box_dims); - auto* prior_box_image = prior_box.mutable_data( - prior_box_image_shape[0], prior_box_image_shape[1], prior_box_image_data.data()); + LOG(INFO) << "prepare input"; + CLImageConverterDefault* default_converter = + new CLImageConverterDefault(); + DDim prior_box_image_shape = + default_converter->InitImageDimInfoWith(prior_box_dims); + LOG(INFO) << "prior_box_image_shape = " << prior_box_image_shape[0] << " " + << prior_box_image_shape[1]; + std::vector prior_box_image_data( + prior_box_image_shape.production() * 4); // 4 : RGBA + default_converter->NCHWToImage( + prior_box_data.data(), + prior_box_image_data.data(), + prior_box_dims); + auto* prior_box_image = prior_box.mutable_data( + prior_box_image_shape[0], + prior_box_image_shape[1], + prior_box_image_data.data()); - DDim prior_box_var_image_shape = - default_converter->InitImageDimInfoWith(prior_box_var_dims); - LOG(INFO) << "prior_box_var_image_shape = " << prior_box_var_image_shape[0] << " " - << prior_box_var_image_shape[1]; - std::vector prior_box_var_image_data(prior_box_var_image_shape.production() * - 4); // 4 : RGBA - default_converter->NCHWToImage( - prior_box_var_data.data(), prior_box_var_image_data.data(), prior_box_var_dims); - auto* prior_box_var_image = prior_box_var.mutable_data( - prior_box_var_image_shape[0], prior_box_var_image_shape[1], - prior_box_var_image_data.data()); + DDim prior_box_var_image_shape = + default_converter->InitImageDimInfoWith(prior_box_var_dims); + LOG(INFO) << "prior_box_var_image_shape = " << prior_box_var_image_shape[0] << " " + << prior_box_var_image_shape[1]; + std::vector prior_box_var_image_data( + prior_box_var_image_shape.production() * 4); // 4 : RGBA + default_converter->NCHWToImage( + prior_box_var_data.data(), + prior_box_var_image_data.data(), + prior_box_var_dims); + auto* prior_box_var_image = prior_box_var.mutable_data( + prior_box_var_image_shape[0], + prior_box_var_image_shape[1], + prior_box_var_image_data.data()); - DDim target_box_image_shape = - default_converter->InitImageDimInfoWith(target_box_dims); - LOG(INFO) << "target_box_image_shape = " << target_box_image_shape[0] << " " - << target_box_image_shape[1]; - std::vector target_box_image_data(target_box_image_shape.production() * - 4); // 4 : RGBA - default_converter->NCHWToImage( - target_box_data.data(), target_box_image_data.data(), target_box_dims); - auto* target_box_image = target_box.mutable_data( - target_box_image_shape[0], target_box_image_shape[1], - target_box_image_data.data()); + DDim target_box_image_shape = + default_converter->InitImageDimInfoWith(target_box_dims); + LOG(INFO) << "target_box_image_shape = " << target_box_image_shape[0] << " " + << target_box_image_shape[1]; + std::vector target_box_image_data( + target_box_image_shape.production() * 4); // 4 : RGBA + default_converter->NCHWToImage( + target_box_data.data(), + target_box_image_data.data(), + target_box_dims); + auto* target_box_image = target_box.mutable_data( + target_box_image_shape[0], + target_box_image_shape[1], + target_box_image_data.data()); - DDim out_image_shape = - default_converter->InitImageDimInfoWith(out_dim); - LOG(INFO) << "out_image_shape = " << out_image_shape[0] << " " - << out_image_shape[1]; - auto* out_image = output_box.mutable_data( - out_image_shape[0], out_image_shape[1]); - kernel->Launch(); + DDim out_image_shape = + default_converter->InitImageDimInfoWith(out_dim); + LOG(INFO) << "out_image_shape = " << out_image_shape[0] << " " + << out_image_shape[1]; + auto* out_image = output_box.mutable_data( + out_image_shape[0], out_image_shape[1]); + kernel->Launch(); - auto* wait_list = context->As().cl_wait_list(); - auto* out_ptr = param.proposals->data(); - auto it = wait_list->find(out_ptr); - if (it != wait_list->end()) { - VLOG(4) << "--- Find the sync event for the target cl " - "tensor. ---"; - auto& event = *(it->second); - event.wait(); - } else { - LOG(FATAL) << "Could not find the sync event for the " + auto* wait_list = context->As().cl_wait_list(); + auto* out_ptr = param.proposals->data(); + auto it = wait_list->find(out_ptr); + if (it != wait_list->end()) { + VLOG(4) << "--- Find the sync event for the target cl " + "tensor. ---"; + auto& event = *(it->second); + event.wait(); + } else { + LOG(FATAL) << "Could not find the sync event for the " "target cl tensor."; - } + } - lite::Tensor out_ref_tensor; - out_ref_tensor.Resize(out_dim); - box_coder_ref(out_ref_tensor.mutable_data(), prior_box_data.data(), - target_box_data.data(), prior_box_var_data.data(), - axis, norm, code_type, target_box_dims[0], target_box_dims[1]); + lite::Tensor out_ref_tensor; + out_ref_tensor.Resize(out_dim); + box_coder_ref(out_ref_tensor.mutable_data(), + prior_box_data.data(), + target_box_data.data(), + prior_box_var_data.data(), + axis, + norm, + code_type, + target_box_dims[0], + target_box_dims[1]); - const size_t cl_image2d_row_pitch{0}; - const size_t cl_image2d_slice_pitch{0}; - half_t* out_image_data = - new half_t[out_image_shape.production() * 4]; - TargetWrapperCL::ImgcpySync(out_image_data, - out_image, - out_image_shape[0], - out_image_shape[1], - cl_image2d_row_pitch, - cl_image2d_slice_pitch, - IoDirection::DtoH); - float* out_data = new float[out_image_shape.production() * 4]; - default_converter->ImageToNCHW( - out_image_data, out_data, out_image_shape, out_dim); + const size_t cl_image2d_row_pitch{0}; + const size_t cl_image2d_slice_pitch{0}; + half_t* out_image_data = + new half_t[out_image_shape.production() * 4]; + TargetWrapperCL::ImgcpySync(out_image_data, + out_image, + out_image_shape[0], + out_image_shape[1], + cl_image2d_row_pitch, + cl_image2d_slice_pitch, + IoDirection::DtoH); + float* out_data = new float[out_image_shape.production() * 4]; + default_converter->ImageToNCHW( + out_image_data, out_data, out_image_shape, out_dim); // result #ifdef BOXCODER_FP16_PRINT_RESULT - LOG(INFO) - << "---- print kernel result (input -> output) ----"; - for (int eidx = 0; eidx < out_dim.production(); ++eidx) { - std::cout << target_box_data[eidx] << " -> " << out_data[eidx] - << std::endl; - } + LOG(INFO) << "---- print kernel result (input -> output) ----"; + for (int eidx = 0; eidx < out_dim.production(); ++eidx) { + std::cout << target_box_data[eidx] << " -> " << out_data[eidx] + << std::endl; + } #endif // BOXCODER_FP16_PRINT_RESULT - const float* out_ref = out_ref_tensor.data(); - for (int i = 0; i < out_dim.production(); i++) { - auto abs_diff = abs(out_data[i] - out_ref[i]); - auto relative_diff = - COMPUTE_RELATIVE_DIFF(out_data[i], out_ref[i]); - EXPECT_EQ((relative_diff <= FP16_MAX_DIFF) || - (abs_diff <= FP16_MAX_DIFF), - true); - if ((relative_diff > FP16_MAX_DIFF) && - (abs_diff > FP16_MAX_DIFF)) { - LOG(ERROR) << "error idx:" << i << ", in_data[" << i - << "]: " << target_box_data[i] << ", out_data[" << i - << "]: " << out_data[i] << ", out_ref[" << i - << "]: " << out_ref[i] - << ", abs_diff: " << abs_diff - << ", relative_diff: " << relative_diff - << ", FP16_MAX_DIFF: " << FP16_MAX_DIFF; - } - } + const float* out_ref = out_ref_tensor.data(); + for (int i = 0; i < out_dim.production(); i++) { + auto abs_diff = abs(out_data[i] - out_ref[i]); + auto relative_diff = + COMPUTE_RELATIVE_DIFF(out_data[i], out_ref[i]); + EXPECT_EQ((relative_diff <= FP16_MAX_DIFF) || + (abs_diff <= FP16_MAX_DIFF), + true); + if ((relative_diff > FP16_MAX_DIFF) && + (abs_diff > FP16_MAX_DIFF)) { + LOG(ERROR) << "error idx:" << i << ", in_data[" << i + << "]: " << target_box_data[i] << ", out_data[" << i + << "]: " << out_data[i] << ", out_ref[" << i + << "]: " << out_ref[i] + << ", abs_diff: " << abs_diff + << ", relative_diff: " << relative_diff + << ", FP16_MAX_DIFF: " << FP16_MAX_DIFF; + } + } #ifdef BOXCODER_FP16_LOOP_TEST - } // axis - } // code_type - } // norm - } // m - } // n + } // axis + } // code_type + } // norm + } // m +} // n #else // nothing to do. #endif