From 802951e7bbd30c302fd3c240ceb35911905e5d25 Mon Sep 17 00:00:00 2001 From: Yuan Shuai Date: Thu, 4 Jun 2020 15:52:50 +0800 Subject: [PATCH] [LITE][OPENCL] Add local work size, global work size for opencl profiler; fix bug for variable inference pass. test=develop (#3741) --- lite/core/mir/variable_place_inference_pass.h | 3 ++- lite/core/profile/profiler.cc | 9 ++++++++- lite/core/profile/profiler.h | 18 ++++++++++++++++++ lite/kernels/opencl/conv_image_compute.h | 2 ++ 4 files changed, 30 insertions(+), 2 deletions(-) diff --git a/lite/core/mir/variable_place_inference_pass.h b/lite/core/mir/variable_place_inference_pass.h index 130c49ddf6..d9f420cfad 100644 --- a/lite/core/mir/variable_place_inference_pass.h +++ b/lite/core/mir/variable_place_inference_pass.h @@ -174,7 +174,8 @@ class VariablePlaceInferencePass : public DebugPass { // If is quantization, infer the Int8 type. if (type->precision() == PRECISION(kInt8)) { x_out->AsArg().type = type; - } else if (type->precision() == PRECISION(kFP16)) { + } else if (type->precision() == PRECISION(kFP16) && + type->target() != TARGET(kOpenCL)) { x_out->AsArg().type = type; } else { PrecisionType tmp_ptype = x_out->AsArg().type->precision(); diff --git a/lite/core/profile/profiler.cc b/lite/core/profile/profiler.cc index 01f216e431..e098513883 100644 --- a/lite/core/profile/profiler.cc +++ b/lite/core/profile/profiler.cc @@ -164,6 +164,10 @@ std::string Profiler::Summary(Type type, bool concise, size_t w) { << " " << setw(9) << left << "clMin(ms)" << " " << setw(9) << left << "clMax(ms)" << " " << setw(9) << left << "clAvg(%)"; + if (!concise) { + ss << " " << setw(12) << left << "GlobalWorkSize" + << " " << setw(12) << left << "LocalWorkSize"; + } #endif ss << std::endl; @@ -301,7 +305,10 @@ std::string Profiler::Summary(Type type, bool concise, size_t w) { << cl_times.Avg(w) << " " << setw(9) << left << fixed << setprecision(3) << cl_times.Min(w) << " " << setw(9) << left << fixed << setprecision(3) << cl_times.Max(w) << " " << left - << setprecision(2) << cl_percent << "% "; + << setprecision(2) << cl_percent << "% " + << " " << setw(12) << left << fixed + << unit.Character().global_work_size << " " << setw(12) << left + << fixed << unit.Character().local_work_size; #endif ss << std::endl; } diff --git a/lite/core/profile/profiler.h b/lite/core/profile/profiler.h index b57973da83..152636272a 100644 --- a/lite/core/profile/profiler.h +++ b/lite/core/profile/profiler.h @@ -19,6 +19,9 @@ #include #include "lite/core/profile/timer.h" #include "lite/core/tensor.h" +#ifdef LITE_WITH_OPENCL +#include "lite/backends/opencl/cl_include.h" +#endif namespace paddle { namespace lite { @@ -63,6 +66,21 @@ struct OpCharacter { #ifdef LITE_WITH_OPENCL cl::Event cl_event{}; + std::string global_work_size{"N/A"}; + std::string local_work_size{"N/A"}; + + std::string NDRangeToStr(const cl::NDRange& range) { + std::string range_str{""}; + const size_t range_size = 3; + for (size_t i = 0; i < range_size /*range.size()*/; ++i) { + LOG(INFO) << "range[" << i << "]:" << std::to_string(range[i]); + range_str += std::to_string(range[i]); + if (i != range_size - 1) { + range_str += ","; + } + } + return range_str; + } #else void* cl_event{nullptr}; #endif diff --git a/lite/kernels/opencl/conv_image_compute.h b/lite/kernels/opencl/conv_image_compute.h index be045bb0be..64276a5721 100644 --- a/lite/kernels/opencl/conv_image_compute.h +++ b/lite/kernels/opencl/conv_image_compute.h @@ -48,6 +48,8 @@ class ConvImageCompute : public KernelLitekernel_func_name = kernel_func_names_[0]; + ch->global_work_size = ch->NDRangeToStr(global_work_size_); + ch->local_work_size = ch->NDRangeToStr(local_work_size_); ch->cl_event = event_; // `event_` defined in `kernel.h`, valid after kernel::Run } -- GitLab