From d1328b25a1e6f800c85c7a80a6289fe2bf23169a Mon Sep 17 00:00:00 2001 From: Yuan Shuai Date: Thu, 21 May 2020 04:45:39 -0500 Subject: [PATCH] [LITE][PROFILE] Enhance ARM CPU profiler with real backend kernel name (#3674) * [LITE][PROFILE] Enhance ARM CPU profiler with real backend kernel. test=develop --- lite/core/profile/profiler.cc | 6 ++-- lite/kernels/arm/conv_compute.h | 10 ++++++ lite/kernels/arm/conv_depthwise.cc | 42 +++++++++++++++++++++++ lite/kernels/arm/conv_depthwise.h | 10 ++++++ lite/kernels/arm/conv_direct.cc | 42 +++++++++++++++++++++++ lite/kernels/arm/conv_direct.h | 10 ++++++ lite/kernels/arm/conv_gemmlike.cc | 42 +++++++++++++++++++++++ lite/kernels/arm/conv_gemmlike.h | 10 ++++++ lite/kernels/arm/conv_transpose_compute.h | 9 +++++ lite/kernels/arm/conv_winograd.cc | 17 +++++++++ lite/kernels/arm/conv_winograd.h | 8 +++++ lite/operators/conv_op.h | 16 +++++---- 12 files changed, 213 insertions(+), 9 deletions(-) diff --git a/lite/core/profile/profiler.cc b/lite/core/profile/profiler.cc index 3c50585ef2..2a23d58187 100644 --- a/lite/core/profile/profiler.cc +++ b/lite/core/profile/profiler.cc @@ -112,7 +112,7 @@ std::string Profiler::Summary(Type type, bool concise, size_t w) { if (!concise) { ss << " " << setw(24) << left << "KernelName"; } - ss << " " << setw(16) << left << "Remark"; + ss << " " << setw(26) << left << "Remark"; if (!concise) { ss << " " << setw(15) << left << "InDim" << " " << setw(15) << left << "FilterDim" @@ -185,7 +185,7 @@ std::string Profiler::Summary(Type type, bool concise, size_t w) { // clang-format off ss << setw(20) << left << fixed << item.first.op_type << " " << setw(30) << left << fixed << item.first.kernel_attr - << " " << setw(16) << left << fixed << item.first.remark + << " " << setw(26) << left << fixed << item.first.remark << " " << setw(7) << left << fixed << setprecision(3) << item.second.avg << " " << setw(7) << left << fixed << setprecision(3) @@ -244,7 +244,7 @@ std::string Profiler::Summary(Type type, bool concise, size_t w) { << " " << setw(30) << left << fixed << unit.Character().kernel_attr << " " << setw(24) << left << fixed << unit.Character().kernel_func_name - << " " << setw(16) << left << fixed << unit.Character().remark + << " " << setw(26) << left << fixed << unit.Character().remark << " " << setw(15) << left << fixed << unit.Character().input_shape << " " << setw(15) << left << fixed << unit.Character().filter_shape << " " << setw(15) << left << fixed << unit.Character().output_shape diff --git a/lite/kernels/arm/conv_compute.h b/lite/kernels/arm/conv_compute.h index 267b4746a3..d01e2b1e03 100644 --- a/lite/kernels/arm/conv_compute.h +++ b/lite/kernels/arm/conv_compute.h @@ -15,6 +15,9 @@ #pragma once #include "lite/backends/arm/math/funcs.h" #include "lite/core/kernel.h" +#ifdef LITE_WITH_PROFILE +#include "lite/core/profile/profiler.h" +#endif namespace paddle { namespace lite { @@ -36,6 +39,13 @@ class ConvCompute : public KernelLite { impl_->Run(); } +#ifdef LITE_WITH_PROFILE + virtual void SetProfileRuntimeKernelInfo( + paddle::lite::profile::OpCharacter* ch) { + impl_->SetProfileRuntimeKernelInfo(ch); + } +#endif + ~ConvCompute() { if (impl_ != nullptr) { delete impl_; diff --git a/lite/kernels/arm/conv_depthwise.cc b/lite/kernels/arm/conv_depthwise.cc index 6f641d0f27..907a915a37 100644 --- a/lite/kernels/arm/conv_depthwise.cc +++ b/lite/kernels/arm/conv_depthwise.cc @@ -50,6 +50,9 @@ void DepthwiseConv::PrepareForRun() { flag_trans_weights_ = true; } impl_ = lite::arm::math::conv_depthwise_3x3_fp32; +#ifdef LITE_WITH_PROFILE + kernel_func_name_ = "conv_depthwise_3x3_fp32"; +#endif } else if (kw == 5) { // VLOG(5) << "invoke 5x5 dw conv fp32"; auto strides = param.strides; @@ -67,6 +70,9 @@ void DepthwiseConv::PrepareForRun() { w_data_in, w_data, oc, 1, cblock, kh * kw); flag_trans_weights_ = true; impl_ = lite::arm::math::conv_depthwise_5x5_fp32; +#ifdef LITE_WITH_PROFILE + kernel_func_name_ = "conv_depthwise_5x5_fp32"; +#endif } else { LOG(FATAL) << "5x5 depthwise conv only support stride == 1 or stride == 2"; @@ -103,6 +109,9 @@ void DepthwiseConv::PrepareForRun() { // trans weights // VLOG(5) << "invoke 3x3 dw conv int8 kernel fp32 out"; impl_ = lite::arm::math::conv_depthwise_3x3_int8_fp32; +#ifdef LITE_WITH_PROFILE + kernel_func_name_ = "conv_depthwise_3x3_int8_fp32"; +#endif int cround = ROUNDUP(w_dims[0], 8); weights_.Resize({cround / 8, 1, kh * kw, 8}); auto wptr = param.filter->data(); @@ -113,6 +122,9 @@ void DepthwiseConv::PrepareForRun() { // trans weights // VLOG(5) << "invoke 5x5 dw conv int8 kernel fp32 out"; impl_ = lite::arm::math::conv_depthwise_5x5_int8_fp32; +#ifdef LITE_WITH_PROFILE + kernel_func_name_ = "conv_depthwise_5x5_int8_fp32"; +#endif int cround = ROUNDUP(w_dims[0], 8); weights_.Resize({cround / 8, 1, kh * kw, 8}); auto wptr = param.filter->data(); @@ -162,6 +174,9 @@ void DepthwiseConv::PrepareForRun() { // trans weights // VLOG(5) << "invoke 3x3 dw conv int8 kernel int8 out"; impl_ = lite::arm::math::conv_depthwise_3x3_int8_int8; +#ifdef LITE_WITH_PROFILE + kernel_func_name_ = "conv_depthwise_3x3_int8_int8"; +#endif int cround = ROUNDUP(w_dims[0], 8); weights_.Resize({cround / 8, 1, kh * kw, 8}); auto wptr = param.filter->data(); @@ -172,6 +187,9 @@ void DepthwiseConv::PrepareForRun() { // trans weights // VLOG(5) << "invoke 5x5 dw conv int8 kernel int8 out"; impl_ = lite::arm::math::conv_depthwise_5x5_int8_int8; +#ifdef LITE_WITH_PROFILE + kernel_func_name_ = "conv_depthwise_5x5_int8_int8"; +#endif int cround = ROUNDUP(w_dims[0], 8); weights_.Resize({cround / 8, 1, kh * kw, 8}); auto wptr = param.filter->data(); @@ -183,6 +201,14 @@ void DepthwiseConv::PrepareForRun() { } } +#ifdef LITE_WITH_PROFILE +template <> +void DepthwiseConv:: + SetProfileRuntimeKernelInfo(paddle::lite::profile::OpCharacter* ch) { + ch->kernel_func_name = kernel_func_name_; +} +#endif + template <> void DepthwiseConv::Run() { auto& param = this->Param(); @@ -225,6 +251,14 @@ void DepthwiseConv::Run() { w_scale_.data()); } +#ifdef LITE_WITH_PROFILE +template <> +void DepthwiseConv:: + SetProfileRuntimeKernelInfo(paddle::lite::profile::OpCharacter* ch) { + ch->kernel_func_name = kernel_func_name_; +} +#endif + template <> void DepthwiseConv::Run() { auto& param = this->Param(); @@ -267,6 +301,14 @@ void DepthwiseConv::Run() { w_scale_.data()); } +#ifdef LITE_WITH_PROFILE +template <> +void DepthwiseConv:: + SetProfileRuntimeKernelInfo(paddle::lite::profile::OpCharacter* ch) { + ch->kernel_func_name = kernel_func_name_; +} +#endif + template <> void DepthwiseConv::Run() { auto& param = this->Param(); diff --git a/lite/kernels/arm/conv_depthwise.h b/lite/kernels/arm/conv_depthwise.h index e1e70355f6..6cbf873a6c 100644 --- a/lite/kernels/arm/conv_depthwise.h +++ b/lite/kernels/arm/conv_depthwise.h @@ -15,6 +15,7 @@ #pragma once #include +#include #include #include "lite/backends/arm/math/conv_impl.h" #include "lite/core/context.h" @@ -48,6 +49,15 @@ class DepthwiseConv : public KernelLite { virtual void PrepareForRun(); virtual void Run(); +#ifdef LITE_WITH_PROFILE + virtual void SetProfileRuntimeKernelInfo( + paddle::lite::profile::OpCharacter* ch) { + ch->kernel_func_name = kernel_func_name_; + } + + std::string kernel_func_name_{"NotImplForConvDw"}; +#endif + private: using param_t = operators::ConvParam; Tensor weights_; diff --git a/lite/kernels/arm/conv_direct.cc b/lite/kernels/arm/conv_direct.cc index ccf36391e7..8a93344bbc 100644 --- a/lite/kernels/arm/conv_direct.cc +++ b/lite/kernels/arm/conv_direct.cc @@ -19,6 +19,14 @@ namespace lite { namespace kernels { namespace arm { +#ifdef LITE_WITH_PROFILE +template <> +void DirectConv:: + SetProfileRuntimeKernelInfo(paddle::lite::profile::OpCharacter* ch) { + ch->kernel_func_name = kernel_func_name_; +} +#endif + template <> void DirectConv::Run() { auto& param = this->Param(); @@ -62,6 +70,9 @@ void DirectConv::Run() { b_data, param, &ctx); +#ifdef LITE_WITH_PROFILE + kernel_func_name_ = "conv_3x3s1_direct_fp32"; +#endif } else { lite::arm::math::conv_3x3s2_direct_fp32(i_data, o_data, @@ -76,9 +87,20 @@ void DirectConv::Run() { b_data, param, &ctx); +#ifdef LITE_WITH_PROFILE + kernel_func_name_ = "conv_3x3s2_direct_fp32"; +#endif } } +#ifdef LITE_WITH_PROFILE +template <> +void DirectConv:: + SetProfileRuntimeKernelInfo(paddle::lite::profile::OpCharacter* ch) { + ch->kernel_func_name = kernel_func_name_; +} +#endif + template <> void DirectConv::Run() { auto& param = this->Param(); @@ -117,6 +139,9 @@ void DirectConv::Run() { param, &ctx, w_scale_.data()); +#ifdef LITE_WITH_PROFILE + kernel_func_name_ = "conv_3x3s1_direct_int8"; +#endif } else { lite::arm::math::conv_3x3s2_direct_int8(i_data, o_data, @@ -132,9 +157,20 @@ void DirectConv::Run() { param, &ctx, w_scale_.data()); +#ifdef LITE_WITH_PROFILE + kernel_func_name_ = "conv_3x3s2_direct_int8"; +#endif } } +#ifdef LITE_WITH_PROFILE +template <> +void DirectConv:: + SetProfileRuntimeKernelInfo(paddle::lite::profile::OpCharacter* ch) { + ch->kernel_func_name = kernel_func_name_; +} +#endif + template <> void DirectConv::Run() { auto& param = this->Param(); @@ -173,6 +209,9 @@ void DirectConv::Run() { param, &ctx, w_scale_.data()); +#ifdef LITE_WITH_PROFILE + kernel_func_name_ = "conv_3x3s1_direct_int8"; +#endif } else { lite::arm::math::conv_3x3s2_direct_int8(i_data, o_data, @@ -188,6 +227,9 @@ void DirectConv::Run() { param, &ctx, w_scale_.data()); +#ifdef LITE_WITH_PROFILE + kernel_func_name_ = "conv_3x3s2_direct_int8"; +#endif } } diff --git a/lite/kernels/arm/conv_direct.h b/lite/kernels/arm/conv_direct.h index cd90c4d6c5..72b5e4cf81 100644 --- a/lite/kernels/arm/conv_direct.h +++ b/lite/kernels/arm/conv_direct.h @@ -15,6 +15,7 @@ #pragma once #include +#include #include #include "lite/backends/arm/math/funcs.h" #include "lite/core/context.h" @@ -180,6 +181,15 @@ class DirectConv : public KernelLite { virtual void Run(); +#ifdef LITE_WITH_PROFILE + virtual void SetProfileRuntimeKernelInfo( + paddle::lite::profile::OpCharacter* ch) { + ch->kernel_func_name = kernel_func_name_; + } + + std::string kernel_func_name_{"NotImplForConvDirect"}; +#endif + /// todo, support inplace weights transform protected: Tensor weights_; diff --git a/lite/kernels/arm/conv_gemmlike.cc b/lite/kernels/arm/conv_gemmlike.cc index 4b1f578869..a40e830554 100644 --- a/lite/kernels/arm/conv_gemmlike.cc +++ b/lite/kernels/arm/conv_gemmlike.cc @@ -81,6 +81,14 @@ void GemmLikeConv::PrepareForRun() { } } +#ifdef LITE_WITH_PROFILE +template <> +void GemmLikeConv:: + SetProfileRuntimeKernelInfo(paddle::lite::profile::OpCharacter* ch) { + ch->kernel_func_name = kernel_func_name_; +} +#endif + template <> void GemmLikeConv::Run() { auto& param = this->Param(); @@ -111,12 +119,26 @@ void GemmLikeConv::Run() { if (flag_1x1gemm_) { lite::arm::math::conv1x1s1_gemm( din, dout, bs, oc, oh, ow, ic, ih, iw, weights, bias, param, &ctx); +#ifdef LITE_WITH_PROFILE + kernel_func_name_ = "conv1x1s1_gemm"; +#endif } else { lite::arm::math::conv_im2col_gemm( din, dout, bs, oc, oh, ow, ic, ih, iw, weights, bias, param, &ctx); +#ifdef LITE_WITH_PROFILE + kernel_func_name_ = "conv_im2col_gemm"; +#endif } } +#ifdef LITE_WITH_PROFILE +template <> +void GemmLikeConv:: + SetProfileRuntimeKernelInfo(paddle::lite::profile::OpCharacter* ch) { + ch->kernel_func_name = kernel_func_name_; +} +#endif + template <> void GemmLikeConv::Run() { auto& param = this->Param(); @@ -159,6 +181,9 @@ void GemmLikeConv::Run() { param, &ctx, w_scale_.data()); +#ifdef LITE_WITH_PROFILE + kernel_func_name_ = "conv1x1s1_gemm_int8"; +#endif } else { lite::arm::math::conv_im2col_gemm_int8(din, dout, @@ -174,9 +199,20 @@ void GemmLikeConv::Run() { param, &ctx, w_scale_.data()); +#ifdef LITE_WITH_PROFILE + kernel_func_name_ = "conv_im2col_gemm_int8"; +#endif } } +#ifdef LITE_WITH_PROFILE +template <> +void GemmLikeConv:: + SetProfileRuntimeKernelInfo(paddle::lite::profile::OpCharacter* ch) { + ch->kernel_func_name = kernel_func_name_; +} +#endif + template <> void GemmLikeConv::Run() { auto& param = this->Param(); @@ -219,6 +255,9 @@ void GemmLikeConv::Run() { param, &ctx, w_scale_.data()); +#ifdef LITE_WITH_PROFILE + kernel_func_name_ = "conv1x1s1_gemm_int8"; +#endif } else { lite::arm::math::conv_im2col_gemm_int8(din, dout, @@ -234,6 +273,9 @@ void GemmLikeConv::Run() { param, &ctx, w_scale_.data()); +#ifdef LITE_WITH_PROFILE + kernel_func_name_ = "conv_im2col_gemm_int8"; +#endif } } diff --git a/lite/kernels/arm/conv_gemmlike.h b/lite/kernels/arm/conv_gemmlike.h index 5e59eb8d17..1713196343 100644 --- a/lite/kernels/arm/conv_gemmlike.h +++ b/lite/kernels/arm/conv_gemmlike.h @@ -15,6 +15,7 @@ #pragma once #include +#include #include #include "lite/backends/arm/math/conv_impl.h" #include "lite/backends/arm/math/funcs.h" @@ -94,6 +95,15 @@ class GemmLikeConv : public KernelLite { virtual void PrepareForRun(); virtual void Run(); +#ifdef LITE_WITH_PROFILE + virtual void SetProfileRuntimeKernelInfo( + paddle::lite::profile::OpCharacter* ch) { + ch->kernel_func_name = kernel_func_name_; + } + + std::string kernel_func_name_{"NotImplForConvGemm"}; +#endif + /// todo, support inplace weights transform protected: using param_t = operators::ConvParam; diff --git a/lite/kernels/arm/conv_transpose_compute.h b/lite/kernels/arm/conv_transpose_compute.h index 7b781cdd52..5aa8f61c4e 100644 --- a/lite/kernels/arm/conv_transpose_compute.h +++ b/lite/kernels/arm/conv_transpose_compute.h @@ -13,6 +13,7 @@ // limitations under the License. #pragma once +#include #include "lite/backends/arm/math/funcs.h" #include "lite/core/kernel.h" #include "lite/operators/conv_transpose_op.h" @@ -33,6 +34,14 @@ class Conv2DTransposeCompute ~Conv2DTransposeCompute() = default; +#ifdef LITE_WITH_PROFILE + virtual void SetProfileRuntimeKernelInfo( + paddle::lite::profile::OpCharacter* ch) { + ch->kernel_func_name = kernel_func_name_; + } + std::string kernel_func_name_{"NotImplForConvTranspose"}; +#endif + protected: int workspace_size_{0}; }; diff --git a/lite/kernels/arm/conv_winograd.cc b/lite/kernels/arm/conv_winograd.cc index d0880e51de..af428fd9c0 100644 --- a/lite/kernels/arm/conv_winograd.cc +++ b/lite/kernels/arm/conv_winograd.cc @@ -94,6 +94,14 @@ void WinogradConv::PrepareForRun() { ReInitWhenNeeded(); } +#ifdef LITE_WITH_PROFILE +template <> +void WinogradConv:: + SetProfileRuntimeKernelInfo(paddle::lite::profile::OpCharacter* ch) { + ch->kernel_func_name = kernel_func_name_; +} +#endif + template <> void WinogradConv::Run() { auto& param = this->Param(); @@ -130,6 +138,9 @@ void WinogradConv::Run() { b_data, param, &ctx); +#ifdef LITE_WITH_PROFILE + kernel_func_name_ = "conv_compute_6x6_3x3"; +#endif } else { int tile_block = 8; int block_count = @@ -148,6 +159,9 @@ void WinogradConv::Run() { b_data, param, &ctx); +#ifdef LITE_WITH_PROFILE + kernel_func_name_ = "conv_compute_2x2_3x3"; +#endif } else { lite::arm::math::conv_compute_2x2_3x3_small(i_data, o_data, @@ -162,6 +176,9 @@ void WinogradConv::Run() { b_data, param, &ctx); +#ifdef LITE_WITH_PROFILE + kernel_func_name_ = "conv_compute_2x2_3x3_small"; +#endif } } } diff --git a/lite/kernels/arm/conv_winograd.h b/lite/kernels/arm/conv_winograd.h index 1a184ac0cc..1cb4d69acb 100644 --- a/lite/kernels/arm/conv_winograd.h +++ b/lite/kernels/arm/conv_winograd.h @@ -15,6 +15,7 @@ #pragma once #include +#include #include "lite/backends/arm/math/conv_impl.h" #include "lite/core/context.h" #include "lite/core/kernel.h" @@ -34,6 +35,13 @@ class WinogradConv : public KernelLite { virtual void PrepareForRun(); virtual void ReInitWhenNeeded(); virtual void Run(); +#ifdef LITE_WITH_PROFILE + virtual void SetProfileRuntimeKernelInfo( + paddle::lite::profile::OpCharacter* ch) { + ch->kernel_func_name = kernel_func_name_; + } + std::string kernel_func_name_{"NotImplForConvWino"}; +#endif protected: using param_t = operators::ConvParam; diff --git a/lite/operators/conv_op.h b/lite/operators/conv_op.h index 993b0d6e71..c3e375e2e4 100644 --- a/lite/operators/conv_op.h +++ b/lite/operators/conv_op.h @@ -22,6 +22,9 @@ #include "lite/core/tensor.h" #include "lite/operators/op_params.h" #include "lite/utils/all.h" +#ifdef LITE_WITH_PROFILE +#include "lite/api/paddle_place.h" +#endif namespace paddle { namespace lite { @@ -44,12 +47,13 @@ class ConvOpLite : public OpLite { ch->input_shape = ch->DimToStr(input_dims); ch->output_shape = ch->DimToStr(output_dims); ch->filter_shape = ch->DimToStr(filter_dims); - ch->remark = std::to_string(filter_dims[2]) + "x" + - std::to_string(filter_dims[3]) + "p" + - std::to_string((*param_.paddings)[0]) + "s" + - std::to_string(param_.strides[0]) + "g" + - std::to_string(param_.groups) + "d" + - std::to_string((*param_.dilations)[0]); + ch->remark = + std::to_string(filter_dims[2]) + "x" + std::to_string(filter_dims[3]) + + "p" + std::to_string((*param_.paddings)[0]) + "s" + + std::to_string(param_.strides[0]) + "g" + + std::to_string(param_.groups) + "d" + + std::to_string((*param_.dilations)[0]) + (param_.bias ? "Bias" : "") + + ActivationTypeToStr(param_.activation_param.active_type); // MACs = 2.f * kw * kh * batchsize * out_c * out_h * out_w * in_c / group // GMACs = 1e-9f * MACs // GMACPS = 1e-6f * MACs / predict_ms -- GitLab