diff --git a/lite/core/profile/precision_profiler.h b/lite/core/profile/precision_profiler.h index 324719e744817f58c7a990e45416de6b98bef03e..39213a33cebd05d9cfa50d82cdfb09ad3f7ad637 100644 --- a/lite/core/profile/precision_profiler.h +++ b/lite/core/profile/precision_profiler.h @@ -33,13 +33,14 @@ namespace lite { namespace profile { template -static void write_tensorfile(const Tensor* tensor, const std::string& locate) { +static bool write_tensorfile(const Tensor* tensor, const std::string& locate) { if (locate.find('/') != std::string::npos) { - return; + return false; } FILE* fp = fopen(locate.c_str(), "w"); if (fp == nullptr) { LOG(ERROR) << "file open field " << locate; + return false; } else { const dtype* data = tensor->data(); for (int i = 0; i < tensor->numel(); ++i) { @@ -47,6 +48,7 @@ static void write_tensorfile(const Tensor* tensor, const std::string& locate) { } } fclose(fp); + return true; } class PrecisionProfiler { @@ -69,9 +71,10 @@ class PrecisionProfiler { << "=========================================" << std::endl; ss << setw(45) << left << "operator:(kernel_info)" << " " << setw(70) << left << "output_tensor_name:(tensor_info)" - << " " << setw(15) << left << "tensor_dims" - << " " << setw(15) << left << "tensor_mean" - << " " << setw(15) << left << "tensor_standard_deviation" << std::endl; + << " " << setw(15) << left << "dims" + << " " << setw(15) << left << "mean" + << " " << setw(15) << left << "std_deviation" + << " " << setw(15) << left << "ave_grow_rate*" << std::endl; return ss.str(); } @@ -102,6 +105,17 @@ class PrecisionProfiler { return sqrt(variance); } + template + double compute_average_grow_rate(const T* in, const size_t length) { + const double eps = 1e-5; + double ave_grow_rate = 0.0f; + for (size_t i = 1; i < length; ++i) { + ave_grow_rate += (in[i] - in[i - 1]) / (in[i - 1] + eps); + } + ave_grow_rate /= length; + return ave_grow_rate; + } + // check if output tensor unused bool is_unused(const Tensor* in) { if (!in->data()) { @@ -116,7 +130,9 @@ class PrecisionProfiler { DataLayoutType layout_type, double* mean, double* std_dev, - std::string name = "inst") { + double* ave_grow_rate, + std::string name = "inst", + bool write_result_to_file = false) { std::string unsupported_error_log = "Unsupported precision profile for kernel registered on" + TargetToStr(target_type) + "/" + PrecisionToStr(precision_type) + "/" + @@ -127,39 +143,44 @@ class PrecisionProfiler { switch (precision_type) { case PRECISION(kFloat): { auto ptr = in->data(); - // write_tensorfile(in, name); *mean = compute_mean(ptr, in->numel()); *std_dev = compute_standard_deviation(ptr, in->numel(), true, *mean); + *ave_grow_rate = compute_average_grow_rate(ptr, in->numel()); + write_result_to_file&& write_tensorfile(in, name); return; } case PRECISION(kAny): { auto ptr = in->data(); - // write_tensorfile(in, name); *mean = compute_mean(ptr, in->numel()); *std_dev = compute_standard_deviation(ptr, in->numel(), true, *mean); + *ave_grow_rate = compute_average_grow_rate(ptr, in->numel()); + write_result_to_file&& write_tensorfile(in, name); return; } case PRECISION(kInt8): { auto ptr = in->data(); - // write_tensorfile(in, name); *mean = compute_mean(ptr, in->numel()); *std_dev = compute_standard_deviation(ptr, in->numel(), true, *mean); + *ave_grow_rate = compute_average_grow_rate(ptr, in->numel()); + write_result_to_file&& write_tensorfile(in, name); return; } case PRECISION(kInt32): { auto ptr = in->data(); - // write_tensorfile(in, name); *mean = compute_mean(ptr, in->numel()); *std_dev = compute_standard_deviation( ptr, in->numel(), true, *mean); + *ave_grow_rate = compute_average_grow_rate(ptr, in->numel()); + write_result_to_file&& write_tensorfile(in, name); return; } default: *mean = -333333333333; *std_dev = -33333333333; + *ave_grow_rate = -33333333333; LOG(ERROR) << unsupported_error_log; return; } @@ -186,11 +207,13 @@ class PrecisionProfiler { IoDirection::DtoH); default_convertor.ImageToNCHW( in_data_v.data(), real_out_v.data(), image_shape, in->dims()); - // write_tensorfile(in, name); CHECK(real_out_v.size() == in->numel()); *mean = compute_mean(real_out_v.data(), real_out_v.size()); *std_dev = compute_standard_deviation( real_out_v.data(), in->numel(), true, *mean); + *ave_grow_rate = compute_average_grow_rate(real_out_v.data(), + real_out_v.size()); + write_result_to_file&& write_tensorfile(in, name); return; } case DATALAYOUT(kNCHW): { @@ -203,11 +226,15 @@ class PrecisionProfiler { *mean = compute_mean(in_data_v.data(), in->numel()); *std_dev = compute_standard_deviation( in_data_v.data(), in->numel(), true, *mean); + *ave_grow_rate = + compute_average_grow_rate(in_data_v.data(), in->numel()); + write_result_to_file&& write_tensorfile(in, name); return; } default: *mean = -222222222222; *std_dev = -22222222222; + *ave_grow_rate = -22222222222; LOG(ERROR) << unsupported_error_log; return; } @@ -215,6 +242,7 @@ class PrecisionProfiler { } else { *mean = -111111111111; *std_dev = -11111111111; + *ave_grow_rate = -11111111111; LOG(ERROR) << unsupported_error_log; return; } @@ -225,6 +253,7 @@ class PrecisionProfiler { using std::left; using std::fixed; STL::stringstream ss; + bool write_result_to_file = false; VLOG(1) << ">> Running kernel: " << inst->op()->op_info()->Repr() << " registered on " << TargetToStr(inst->kernel()->target()) << "/" @@ -252,8 +281,10 @@ class PrecisionProfiler { op_scope->FindVar(out_name)->GetMutable(); double mean = -999999; double std_dev = -100000; + double ave_grow_rate = 99999; std::string mean_str{"unused"}; std::string std_dev_str{"unused"}; + std::string ave_grow_rate_str{"unused"}; if (!is_unused(tout)) { compute_tensor_precision_info(tout, @@ -262,9 +293,12 @@ class PrecisionProfiler { type->layout(), &mean, &std_dev, - out_name); - mean_str = paddle::lite::to_string(mean); - std_dev_str = paddle::lite::to_string(std_dev); + &ave_grow_rate, + out_name, + write_result_to_file); + mean_str = std::to_string(mean); + std_dev_str = std::to_string(std_dev); + ave_grow_rate_str = std::to_string(ave_grow_rate); } std::string kernel_info = op_name + ":" + kernel_place; std::string output_arg_info = out_name + ":" + @@ -275,7 +309,8 @@ class PrecisionProfiler { ss << setw(45) << left << kernel_info << " " << setw(70) << left << output_arg_info << " " << setw(15) << left << tout->dims() << " " << setw(15) << left << mean_str << " " << setw(15) << left - << std_dev_str << std::endl; + << std_dev_str << " " << setw(15) << left << ave_grow_rate_str + << std::endl; } else if (type->IsTensorList()) { auto touts = op_scope->FindVar(out_name)->GetMutable>(); @@ -283,8 +318,10 @@ class PrecisionProfiler { const Tensor* tout = &t; double mean = -999999; double std_dev = -100000; + double ave_grow_rate = 99999; std::string mean_str{"unused"}; std::string std_dev_str{"unused"}; + std::string ave_grow_rate_str{"unused"}; if (!is_unused(tout)) { compute_tensor_precision_info(tout, @@ -293,9 +330,12 @@ class PrecisionProfiler { type->layout(), &mean, &std_dev, - out_name); - mean_str = paddle::lite::to_string(mean); - std_dev_str = paddle::lite::to_string(std_dev); + &ave_grow_rate, + out_name, + write_result_to_file); + mean_str = std::to_string(mean); + std_dev_str = std::to_string(std_dev); + ave_grow_rate_str = std::to_string(ave_grow_rate); } std::string kernel_info = op_name + ":" + kernel_place; std::string output_arg_info = out_name + ":" + @@ -306,7 +346,8 @@ class PrecisionProfiler { ss << setw(45) << left << kernel_info << " " << setw(70) << left << output_arg_info << " " << setw(15) << left << tout->dims() << " " << setw(15) << left << mean_str << " " << setw(15) << left - << std_dev_str << std::endl; + << std_dev_str << " " << setw(15) << left << ave_grow_rate_str + << std::endl; } } } diff --git a/lite/kernels/opencl/elementwise_mul_image_compute.cc b/lite/kernels/opencl/elementwise_mul_image_compute.cc index 097ed8d62dc92ac16dfc7477496a412e759d41a9..78a025566f24cb604910eb3766cb05c8647e1e03 100644 --- a/lite/kernels/opencl/elementwise_mul_image_compute.cc +++ b/lite/kernels/opencl/elementwise_mul_image_compute.cc @@ -56,7 +56,7 @@ class ElementwiseMulImageCompute } else { kernel_func_name_ = "channel_mul_d2_hw"; } - } else if (y_dims.size() == 4) { + } else if (y_dims.size() == 4 || x_dims.size() == 4) { kernel_func_name_ = "channel_mul_d4"; } else { LOG(FATAL) << "ElementwiseMul not supported y_dims.size():" @@ -172,6 +172,18 @@ class ElementwiseMulImageCompute status = kernel.setArg(++arg_idx, static_cast(y_tensor_h)); CL_CHECK_FATAL(status); } + } else if (x_dims.size() == 4) { + auto tensor_w = y_dims[y_dims.size() - 1]; + VLOG(4) << "tensor_w:" << tensor_w; + // kernel: channel_mul_d4 + cl_int status = kernel.setArg(arg_idx, *y_img); + CL_CHECK_FATAL(status); + status = kernel.setArg(++arg_idx, *x_img); + CL_CHECK_FATAL(status); + status = kernel.setArg(++arg_idx, *out_img); + CL_CHECK_FATAL(status); + status = kernel.setArg(++arg_idx, static_cast(tensor_w)); + CL_CHECK_FATAL(status); } else { LOG(FATAL) << "ElementwiseMul not supported y_dims.size():" << y_dims.size(); diff --git a/lite/kernels/opencl/pad2d_image_compute_test.cc b/lite/kernels/opencl/pad2d_image_compute_test.cc index d1e1e3bb4c8fc80fabacff52b66f20387dd7766f..c2371d07f31caf569cfe4b299bf2f88373eb3b9f 100644 --- a/lite/kernels/opencl/pad2d_image_compute_test.cc +++ b/lite/kernels/opencl/pad2d_image_compute_test.cc @@ -89,7 +89,7 @@ void pad2d_ref(const float *x_data, } } -#define LOOP_TEST +// #define LOOP_TEST // #define PRINT_RESULT TEST(pad2d_image2d, compute) { LOG(INFO) << "main steps of test: host -> layout(buf2img) -> " diff --git a/lite/kernels/opencl/reshape_image_compute.cc b/lite/kernels/opencl/reshape_image_compute.cc index 4177be03a5542603a1753295837a9cf402016530..376add226216a57a0868c9c52497b784929a207e 100644 --- a/lite/kernels/opencl/reshape_image_compute.cc +++ b/lite/kernels/opencl/reshape_image_compute.cc @@ -203,8 +203,8 @@ REGISTER_LITE_KERNEL(reshape2, PRECISION(kFP16), DATALAYOUT(kImageDefault))}) .BindInput("ShapeTensor", {LiteType::GetTensorTy(TARGET(kOpenCL))}) - .BindInput("Shape", {LiteType::GetTensorTy(TARGET(kOpenCL))}) - .BindOutput("XShape", {LiteType::GetTensorTy(TARGET(kOpenCL))}) + .BindInput("Shape", {LiteType::GetTensorTy(TARGET(kARM))}) + .BindOutput("XShape", {LiteType::GetTensorTy(TARGET(kARM))}) .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kOpenCL), PRECISION(kFP16), @@ -221,7 +221,7 @@ REGISTER_LITE_KERNEL(flatten, {LiteType::GetTensorTy(TARGET(kOpenCL), PRECISION(kFP16), DATALAYOUT(kImageDefault))}) - .BindInput("Shape", {LiteType::GetTensorTy(TARGET(kOpenCL))}) + .BindInput("Shape", {LiteType::GetTensorTy(TARGET(kARM))}) .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kOpenCL), PRECISION(kFP16), @@ -239,7 +239,7 @@ REGISTER_LITE_KERNEL(flatten2, PRECISION(kFP16), DATALAYOUT(kImageDefault))}) .BindInput("Shape", {LiteType::GetTensorTy(TARGET(kOpenCL))}) - .BindOutput("XShape", {LiteType::GetTensorTy(TARGET(kOpenCL))}) + .BindOutput("XShape", {LiteType::GetTensorTy(TARGET(kARM))}) .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kOpenCL), PRECISION(kFP16),