diff --git a/lite/core/profile/precision_profiler.h b/lite/core/profile/precision_profiler.h index ee581bf5e126f07fcdb1edeb9ab5b570df0c2ade..f29ca5f8409779a2eb9b6b98b91c2c1e3af3c06d 100644 --- a/lite/core/profile/precision_profiler.h +++ b/lite/core/profile/precision_profiler.h @@ -52,6 +52,24 @@ static bool write_tensorfile(const Tensor* tensor, const std::string& locate) { return true; } +static bool write_precision_summary_tofile(const std::string& string, + const std::string& log_dir = "") { + if (log_dir == "") { + LOG(INFO) << "The `log_dir` of precision summary file is not set. log_dir:" + << log_dir; + return false; + } + FILE* fp = fopen(log_dir.c_str(), "a"); + if (fp == nullptr) { + LOG(INFO) << "Open precision summary file:" << log_dir << "failed."; + return false; + } else { + fprintf(fp, "%s\n", string.c_str()); + } + fclose(fp); + return true; +} + class PrecisionProfiler { public: // TODO(ysh329): need to remove `explicit PrecisionProfiler` @@ -67,7 +85,7 @@ class PrecisionProfiler { using std::left; using std::fixed; STL::stringstream ss; - ss << "========================================= " + ss << "\n\n========================================= " << "Detailed Precision Profiler Summary " << "=========================================" << std::endl; ss << setw(45) << left << "operator:(kernel_info)" @@ -77,6 +95,13 @@ class PrecisionProfiler { << " " << setw(15) << left << "std_deviation" << " " << setw(15) << left << "ave_grow_rate*" << std::endl; + // write to file with path: `log_dir` + if (log_dir_ != "") { + FILE* fp = fopen(log_dir_.c_str(), "a"); + std::string header_str{ss.str()}; + fprintf(fp, "%s\n", header_str.c_str()); + fclose(fp); + } return ss.str(); } @@ -194,6 +219,7 @@ class PrecisionProfiler { } #ifdef LITE_WITH_OPENCL } else if (target_type == TARGET(kOpenCL)) { + CLRuntime::Global()->command_queue().finish(); switch (layout_type) { case DATALAYOUT(kImageDefault): { paddle::lite::CLImageConverterDefault default_convertor; @@ -360,8 +386,12 @@ class PrecisionProfiler { } } } + write_precision_summary_tofile(ss.str(), log_dir_); return ss.str(); } + + private: + std::string log_dir_{"/storage/emulated/0/precision.log"}; }; } // namespace profile diff --git a/lite/kernels/opencl/conv_image_compute.cc b/lite/kernels/opencl/conv_image_compute.cc index 861b8b2fe90ecbb7cb5504e81b5d678bdcf02bf2..9e5f365fdb5f8f678af4da189587d30b41bd0b41 100644 --- a/lite/kernels/opencl/conv_image_compute.cc +++ b/lite/kernels/opencl/conv_image_compute.cc @@ -400,16 +400,28 @@ void ConvImageCompute::PrepareForRun() { VLOG(1) << "kernel_func_names_[0]:" << kernel_func_names_[0] << " kernel_func_paths_[0]:" << kernel_func_paths_[0]; + // build options std::string build_options_single(" -DCL_DTYPE_half"); // relu options - if (relu_fused) { - build_options_single += " -DRELU"; - } else if (param.activation_param.active_type == - lite_api::ActivationType::kRelu6) { - build_options_single += " -DRELU6"; - } else { - // do nothing, may add more activation fuse + VLOG(3) << "relu_fused:" << relu_fused + << " param.activation_param.active_type:" + << static_cast(param.activation_param.active_type) + << " param.activation_param.has_active:" + << param.activation_param.has_active; + if (param.activation_param.has_active) { + if (param.activation_param.active_type == + lite_api::ActivationType::kRelu) { // Note: judge using `relu_fused` + // also is ok + build_options_single += " -DRELU"; + } else if (param.activation_param.active_type == + lite_api::ActivationType::kRelu6) { + build_options_single += " -DRELU6"; + } else { + LOG(FATAL) << "Unsupported activation type:" + << static_cast(param.activation_param.active_type); + } } + // bias options const bool has_bias = param.bias != nullptr; const bool is_element_wise_bias = @@ -648,7 +660,7 @@ void ConvImageCompute::Conv2d3x3(bool is_turn) { int filter_height = filter_dims[2]; int filter_channel = filter_dims[1]; auto out_image_shape = InitImageDimInfoWith(output_dims); - auto* out_image = param.output->mutable_data( + auto* out_image = param.output->mutable_data( out_image_shape["width"], out_image_shape["height"]); const bool has_bias = param.bias != nullptr; @@ -724,7 +736,7 @@ void ConvImageCompute::Conv2d3x3(bool is_turn) { const cl::Image2D* bias_image = nullptr; if (has_bias) { - bias_image = bias_gpu_image_->data(); + bias_image = bias_gpu_image_->data(); } auto& context = ctx_->As(); diff --git a/lite/kernels/opencl/conv_image_compute_test.cc b/lite/kernels/opencl/conv_image_compute_test.cc index 73a5ee3d4980cc46dec20ec7948f1ae38cd1eca1..95a0f7f4b6bfd0f6e76367b24b184350456e7e70 100644 --- a/lite/kernels/opencl/conv_image_compute_test.cc +++ b/lite/kernels/opencl/conv_image_compute_test.cc @@ -197,15 +197,23 @@ TEST(conv2d, compute_image2d_1x1) { if (bias_flag) { param.bias = &bias; } + if (relu_flag == "relu") { - param.fuse_relu = true; + param.fuse_relu = true; // relu only + param.activation_param.has_active = true; + param.activation_param.active_type = + lite_api::ActivationType::kRelu; } else if (relu_flag == "None") { param.fuse_relu = false; + param.activation_param.has_active = false; } else if (relu_flag == "relu6") { param.activation_param.Relu_clipped_coef = 6.f; param.activation_param.has_active = true; param.activation_param.active_type = lite_api::ActivationType::kRelu6; + } else { + param.fuse_relu = false; // relu only + param.activation_param.has_active = false; } std::vector paddings = {pad, pad, pad, pad}; @@ -337,7 +345,7 @@ TEST(conv2d, compute_image2d_1x1) { SHADOW_LOG << "(" << i << ")" << Half2Float(x_image_v[i]); } // auto* filter_image2d = - // filter.mutable_data( + // filter.mutable_data( // filter_image_width, // filter_image_height, // filter_image_v.data()); @@ -563,15 +571,23 @@ const int stride = 2; if (bias_flag) { param.bias = &bias; } + if (relu_flag == "relu") { - param.fuse_relu = true; + param.fuse_relu = true; // relu only + param.activation_param.has_active = true; + param.activation_param.active_type = + lite_api::ActivationType::kRelu; } else if (relu_flag == "None") { param.fuse_relu = false; + param.activation_param.has_active = false; } else if (relu_flag == "relu6") { param.activation_param.Relu_clipped_coef = 6.f; param.activation_param.has_active = true; param.activation_param.active_type = lite_api::ActivationType::kRelu6; + } else { + param.fuse_relu = false; // relu only + param.activation_param.has_active = false; } std::vector paddings = {pad, pad, pad, pad}; @@ -912,14 +928,21 @@ TEST(conv2d, compute_image2d_5x5) { param.bias = &bias; } if (relu_flag == "relu") { - param.fuse_relu = true; + param.fuse_relu = true; // relu only + param.activation_param.has_active = true; + param.activation_param.active_type = + lite_api::ActivationType::kRelu; } else if (relu_flag == "None") { param.fuse_relu = false; + param.activation_param.has_active = false; } else if (relu_flag == "relu6") { param.activation_param.Relu_clipped_coef = 6.f; param.activation_param.has_active = true; param.activation_param.active_type = lite_api::ActivationType::kRelu6; + } else { + param.fuse_relu = false; // relu only + param.activation_param.has_active = false; } std::vector paddings = {pad, pad, pad, pad}; @@ -1244,16 +1267,25 @@ TEST(conv2d, compute_image2d_7x7) { if (bias_flag) { param.bias = &bias; } + if (relu_flag == "relu") { - param.fuse_relu = true; + param.fuse_relu = true; // relu only + param.activation_param.has_active = true; + param.activation_param.active_type = + lite_api::ActivationType::kRelu; } else if (relu_flag == "None") { param.fuse_relu = false; + param.activation_param.has_active = false; } else if (relu_flag == "relu6") { param.activation_param.Relu_clipped_coef = 6.f; param.activation_param.has_active = true; param.activation_param.active_type = lite_api::ActivationType::kRelu6; + } else { + param.fuse_relu = false; // relu only + param.activation_param.has_active = false; } + std::vector paddings = {pad, pad, pad, pad}; std::vector dilations = {dilation, dilation}; diff --git a/lite/operators/op_params.h b/lite/operators/op_params.h index cfdb0d5389cccda03d304216c4e0a6329e5dc86f..599da099d7d6e2cdee38ed64b8b3ece8cf8582c9 100644 --- a/lite/operators/op_params.h +++ b/lite/operators/op_params.h @@ -340,7 +340,7 @@ struct ConcatParam : ParamBase { struct ActivationParam : ParamBase { const lite::Tensor* X{}; lite::Tensor* Out{}; - lite_api::ActivationType active_type; + lite_api::ActivationType active_type{lite_api::ActivationType::kIndentity}; bool has_active{false}; float Leaky_relu_alpha{0}; // leaky_relu param float Relu_clipped_coef{6}; // relu_clipped param