未验证 提交 049af076 编写于 作者: Y Yuan Shuai 提交者: GitHub

[LITE][PROFILE] Enhance ARM CPU profiler with real backend kernel name (#3674)

* [LITE][PROFILE] Enhance ARM CPU profiler with real backend kernel. test=develop
上级 0e6ff923
......@@ -112,7 +112,7 @@ std::string Profiler::Summary(Type type, bool concise, size_t w) {
if (!concise) {
ss << " " << setw(24) << left << "KernelName";
}
ss << " " << setw(16) << left << "Remark";
ss << " " << setw(26) << left << "Remark";
if (!concise) {
ss << " " << setw(15) << left << "InDim"
<< " " << setw(15) << left << "FilterDim"
......@@ -185,7 +185,7 @@ std::string Profiler::Summary(Type type, bool concise, size_t w) {
// clang-format off
ss << setw(20) << left << fixed << item.first.op_type
<< " " << setw(30) << left << fixed << item.first.kernel_attr
<< " " << setw(16) << left << fixed << item.first.remark
<< " " << setw(26) << left << fixed << item.first.remark
<< " " << setw(7) << left << fixed << setprecision(3)
<< item.second.avg
<< " " << setw(7) << left << fixed << setprecision(3)
......@@ -244,7 +244,7 @@ std::string Profiler::Summary(Type type, bool concise, size_t w) {
<< " " << setw(30) << left << fixed << unit.Character().kernel_attr
<< " " << setw(24) << left << fixed
<< unit.Character().kernel_func_name
<< " " << setw(16) << left << fixed << unit.Character().remark
<< " " << setw(26) << left << fixed << unit.Character().remark
<< " " << setw(15) << left << fixed << unit.Character().input_shape
<< " " << setw(15) << left << fixed << unit.Character().filter_shape
<< " " << setw(15) << left << fixed << unit.Character().output_shape
......
......@@ -15,6 +15,9 @@
#pragma once
#include "lite/backends/arm/math/funcs.h"
#include "lite/core/kernel.h"
#ifdef LITE_WITH_PROFILE
#include "lite/core/profile/profiler.h"
#endif
namespace paddle {
namespace lite {
......@@ -36,6 +39,13 @@ class ConvCompute : public KernelLite<TARGET(kARM), Ptype> {
impl_->Run();
}
#ifdef LITE_WITH_PROFILE
virtual void SetProfileRuntimeKernelInfo(
paddle::lite::profile::OpCharacter* ch) {
impl_->SetProfileRuntimeKernelInfo(ch);
}
#endif
~ConvCompute() {
if (impl_ != nullptr) {
delete impl_;
......
......@@ -50,6 +50,9 @@ void DepthwiseConv<PRECISION(kFloat), PRECISION(kFloat)>::PrepareForRun() {
flag_trans_weights_ = true;
}
impl_ = lite::arm::math::conv_depthwise_3x3_fp32;
#ifdef LITE_WITH_PROFILE
kernel_func_name_ = "conv_depthwise_3x3_fp32";
#endif
} else if (kw == 5) {
// VLOG(5) << "invoke 5x5 dw conv fp32";
auto strides = param.strides;
......@@ -67,6 +70,9 @@ void DepthwiseConv<PRECISION(kFloat), PRECISION(kFloat)>::PrepareForRun() {
w_data_in, w_data, oc, 1, cblock, kh * kw);
flag_trans_weights_ = true;
impl_ = lite::arm::math::conv_depthwise_5x5_fp32;
#ifdef LITE_WITH_PROFILE
kernel_func_name_ = "conv_depthwise_5x5_fp32";
#endif
} else {
LOG(FATAL)
<< "5x5 depthwise conv only support stride == 1 or stride == 2";
......@@ -103,6 +109,9 @@ void DepthwiseConv<PRECISION(kInt8), PRECISION(kFloat)>::PrepareForRun() {
// trans weights
// VLOG(5) << "invoke 3x3 dw conv int8 kernel fp32 out";
impl_ = lite::arm::math::conv_depthwise_3x3_int8_fp32;
#ifdef LITE_WITH_PROFILE
kernel_func_name_ = "conv_depthwise_3x3_int8_fp32";
#endif
int cround = ROUNDUP(w_dims[0], 8);
weights_.Resize({cround / 8, 1, kh * kw, 8});
auto wptr = param.filter->data<int8_t>();
......@@ -113,6 +122,9 @@ void DepthwiseConv<PRECISION(kInt8), PRECISION(kFloat)>::PrepareForRun() {
// trans weights
// VLOG(5) << "invoke 5x5 dw conv int8 kernel fp32 out";
impl_ = lite::arm::math::conv_depthwise_5x5_int8_fp32;
#ifdef LITE_WITH_PROFILE
kernel_func_name_ = "conv_depthwise_5x5_int8_fp32";
#endif
int cround = ROUNDUP(w_dims[0], 8);
weights_.Resize({cround / 8, 1, kh * kw, 8});
auto wptr = param.filter->data<int8_t>();
......@@ -162,6 +174,9 @@ void DepthwiseConv<PRECISION(kInt8), PRECISION(kInt8)>::PrepareForRun() {
// trans weights
// VLOG(5) << "invoke 3x3 dw conv int8 kernel int8 out";
impl_ = lite::arm::math::conv_depthwise_3x3_int8_int8;
#ifdef LITE_WITH_PROFILE
kernel_func_name_ = "conv_depthwise_3x3_int8_int8";
#endif
int cround = ROUNDUP(w_dims[0], 8);
weights_.Resize({cround / 8, 1, kh * kw, 8});
auto wptr = param.filter->data<int8_t>();
......@@ -172,6 +187,9 @@ void DepthwiseConv<PRECISION(kInt8), PRECISION(kInt8)>::PrepareForRun() {
// trans weights
// VLOG(5) << "invoke 5x5 dw conv int8 kernel int8 out";
impl_ = lite::arm::math::conv_depthwise_5x5_int8_int8;
#ifdef LITE_WITH_PROFILE
kernel_func_name_ = "conv_depthwise_5x5_int8_int8";
#endif
int cround = ROUNDUP(w_dims[0], 8);
weights_.Resize({cround / 8, 1, kh * kw, 8});
auto wptr = param.filter->data<int8_t>();
......@@ -183,6 +201,14 @@ void DepthwiseConv<PRECISION(kInt8), PRECISION(kInt8)>::PrepareForRun() {
}
}
#ifdef LITE_WITH_PROFILE
template <>
void DepthwiseConv<PRECISION(kFloat), PRECISION(kFloat)>::
SetProfileRuntimeKernelInfo(paddle::lite::profile::OpCharacter* ch) {
ch->kernel_func_name = kernel_func_name_;
}
#endif
template <>
void DepthwiseConv<PRECISION(kFloat), PRECISION(kFloat)>::Run() {
auto& param = this->Param<param_t>();
......@@ -225,6 +251,14 @@ void DepthwiseConv<PRECISION(kFloat), PRECISION(kFloat)>::Run() {
w_scale_.data());
}
#ifdef LITE_WITH_PROFILE
template <>
void DepthwiseConv<PRECISION(kInt8), PRECISION(kFloat)>::
SetProfileRuntimeKernelInfo(paddle::lite::profile::OpCharacter* ch) {
ch->kernel_func_name = kernel_func_name_;
}
#endif
template <>
void DepthwiseConv<PRECISION(kInt8), PRECISION(kFloat)>::Run() {
auto& param = this->Param<param_t>();
......@@ -267,6 +301,14 @@ void DepthwiseConv<PRECISION(kInt8), PRECISION(kFloat)>::Run() {
w_scale_.data());
}
#ifdef LITE_WITH_PROFILE
template <>
void DepthwiseConv<PRECISION(kInt8), PRECISION(kInt8)>::
SetProfileRuntimeKernelInfo(paddle::lite::profile::OpCharacter* ch) {
ch->kernel_func_name = kernel_func_name_;
}
#endif
template <>
void DepthwiseConv<PRECISION(kInt8), PRECISION(kInt8)>::Run() {
auto& param = this->Param<param_t>();
......
......@@ -15,6 +15,7 @@
#pragma once
#include <cmath>
#include <string>
#include <vector>
#include "lite/backends/arm/math/conv_impl.h"
#include "lite/core/context.h"
......@@ -48,6 +49,15 @@ class DepthwiseConv : public KernelLite<TARGET(kARM), Ptype> {
virtual void PrepareForRun();
virtual void Run();
#ifdef LITE_WITH_PROFILE
virtual void SetProfileRuntimeKernelInfo(
paddle::lite::profile::OpCharacter* ch) {
ch->kernel_func_name = kernel_func_name_;
}
std::string kernel_func_name_{"NotImplForConvDw"};
#endif
private:
using param_t = operators::ConvParam;
Tensor weights_;
......
......@@ -19,6 +19,14 @@ namespace lite {
namespace kernels {
namespace arm {
#ifdef LITE_WITH_PROFILE
template <>
void DirectConv<PRECISION(kFloat), PRECISION(kFloat)>::
SetProfileRuntimeKernelInfo(paddle::lite::profile::OpCharacter* ch) {
ch->kernel_func_name = kernel_func_name_;
}
#endif
template <>
void DirectConv<PRECISION(kFloat), PRECISION(kFloat)>::Run() {
auto& param = this->Param<param_t>();
......@@ -62,6 +70,9 @@ void DirectConv<PRECISION(kFloat), PRECISION(kFloat)>::Run() {
b_data,
param,
&ctx);
#ifdef LITE_WITH_PROFILE
kernel_func_name_ = "conv_3x3s1_direct_fp32";
#endif
} else {
lite::arm::math::conv_3x3s2_direct_fp32(i_data,
o_data,
......@@ -76,9 +87,20 @@ void DirectConv<PRECISION(kFloat), PRECISION(kFloat)>::Run() {
b_data,
param,
&ctx);
#ifdef LITE_WITH_PROFILE
kernel_func_name_ = "conv_3x3s2_direct_fp32";
#endif
}
}
#ifdef LITE_WITH_PROFILE
template <>
void DirectConv<PRECISION(kInt8), PRECISION(kFloat)>::
SetProfileRuntimeKernelInfo(paddle::lite::profile::OpCharacter* ch) {
ch->kernel_func_name = kernel_func_name_;
}
#endif
template <>
void DirectConv<PRECISION(kInt8), PRECISION(kFloat)>::Run() {
auto& param = this->Param<param_t>();
......@@ -117,6 +139,9 @@ void DirectConv<PRECISION(kInt8), PRECISION(kFloat)>::Run() {
param,
&ctx,
w_scale_.data());
#ifdef LITE_WITH_PROFILE
kernel_func_name_ = "conv_3x3s1_direct_int8";
#endif
} else {
lite::arm::math::conv_3x3s2_direct_int8(i_data,
o_data,
......@@ -132,9 +157,20 @@ void DirectConv<PRECISION(kInt8), PRECISION(kFloat)>::Run() {
param,
&ctx,
w_scale_.data());
#ifdef LITE_WITH_PROFILE
kernel_func_name_ = "conv_3x3s2_direct_int8";
#endif
}
}
#ifdef LITE_WITH_PROFILE
template <>
void DirectConv<PRECISION(kInt8), PRECISION(kInt8)>::
SetProfileRuntimeKernelInfo(paddle::lite::profile::OpCharacter* ch) {
ch->kernel_func_name = kernel_func_name_;
}
#endif
template <>
void DirectConv<PRECISION(kInt8), PRECISION(kInt8)>::Run() {
auto& param = this->Param<param_t>();
......@@ -173,6 +209,9 @@ void DirectConv<PRECISION(kInt8), PRECISION(kInt8)>::Run() {
param,
&ctx,
w_scale_.data());
#ifdef LITE_WITH_PROFILE
kernel_func_name_ = "conv_3x3s1_direct_int8";
#endif
} else {
lite::arm::math::conv_3x3s2_direct_int8(i_data,
o_data,
......@@ -188,6 +227,9 @@ void DirectConv<PRECISION(kInt8), PRECISION(kInt8)>::Run() {
param,
&ctx,
w_scale_.data());
#ifdef LITE_WITH_PROFILE
kernel_func_name_ = "conv_3x3s2_direct_int8";
#endif
}
}
......
......@@ -15,6 +15,7 @@
#pragma once
#include <cmath>
#include <string>
#include <vector>
#include "lite/backends/arm/math/funcs.h"
#include "lite/core/context.h"
......@@ -180,6 +181,15 @@ class DirectConv : public KernelLite<TARGET(kARM), Ptype> {
virtual void Run();
#ifdef LITE_WITH_PROFILE
virtual void SetProfileRuntimeKernelInfo(
paddle::lite::profile::OpCharacter* ch) {
ch->kernel_func_name = kernel_func_name_;
}
std::string kernel_func_name_{"NotImplForConvDirect"};
#endif
/// todo, support inplace weights transform
protected:
Tensor weights_;
......
......@@ -81,6 +81,14 @@ void GemmLikeConv<PRECISION(kInt8), PRECISION(kInt8)>::PrepareForRun() {
}
}
#ifdef LITE_WITH_PROFILE
template <>
void GemmLikeConv<PRECISION(kFloat), PRECISION(kFloat)>::
SetProfileRuntimeKernelInfo(paddle::lite::profile::OpCharacter* ch) {
ch->kernel_func_name = kernel_func_name_;
}
#endif
template <>
void GemmLikeConv<PRECISION(kFloat), PRECISION(kFloat)>::Run() {
auto& param = this->Param<param_t>();
......@@ -111,12 +119,26 @@ void GemmLikeConv<PRECISION(kFloat), PRECISION(kFloat)>::Run() {
if (flag_1x1gemm_) {
lite::arm::math::conv1x1s1_gemm(
din, dout, bs, oc, oh, ow, ic, ih, iw, weights, bias, param, &ctx);
#ifdef LITE_WITH_PROFILE
kernel_func_name_ = "conv1x1s1_gemm";
#endif
} else {
lite::arm::math::conv_im2col_gemm(
din, dout, bs, oc, oh, ow, ic, ih, iw, weights, bias, param, &ctx);
#ifdef LITE_WITH_PROFILE
kernel_func_name_ = "conv_im2col_gemm";
#endif
}
}
#ifdef LITE_WITH_PROFILE
template <>
void GemmLikeConv<PRECISION(kInt8), PRECISION(kFloat)>::
SetProfileRuntimeKernelInfo(paddle::lite::profile::OpCharacter* ch) {
ch->kernel_func_name = kernel_func_name_;
}
#endif
template <>
void GemmLikeConv<PRECISION(kInt8), PRECISION(kFloat)>::Run() {
auto& param = this->Param<param_t>();
......@@ -159,6 +181,9 @@ void GemmLikeConv<PRECISION(kInt8), PRECISION(kFloat)>::Run() {
param,
&ctx,
w_scale_.data());
#ifdef LITE_WITH_PROFILE
kernel_func_name_ = "conv1x1s1_gemm_int8";
#endif
} else {
lite::arm::math::conv_im2col_gemm_int8(din,
dout,
......@@ -174,9 +199,20 @@ void GemmLikeConv<PRECISION(kInt8), PRECISION(kFloat)>::Run() {
param,
&ctx,
w_scale_.data());
#ifdef LITE_WITH_PROFILE
kernel_func_name_ = "conv_im2col_gemm_int8";
#endif
}
}
#ifdef LITE_WITH_PROFILE
template <>
void GemmLikeConv<PRECISION(kInt8), PRECISION(kInt8)>::
SetProfileRuntimeKernelInfo(paddle::lite::profile::OpCharacter* ch) {
ch->kernel_func_name = kernel_func_name_;
}
#endif
template <>
void GemmLikeConv<PRECISION(kInt8), PRECISION(kInt8)>::Run() {
auto& param = this->Param<param_t>();
......@@ -219,6 +255,9 @@ void GemmLikeConv<PRECISION(kInt8), PRECISION(kInt8)>::Run() {
param,
&ctx,
w_scale_.data());
#ifdef LITE_WITH_PROFILE
kernel_func_name_ = "conv1x1s1_gemm_int8";
#endif
} else {
lite::arm::math::conv_im2col_gemm_int8(din,
dout,
......@@ -234,6 +273,9 @@ void GemmLikeConv<PRECISION(kInt8), PRECISION(kInt8)>::Run() {
param,
&ctx,
w_scale_.data());
#ifdef LITE_WITH_PROFILE
kernel_func_name_ = "conv_im2col_gemm_int8";
#endif
}
}
......
......@@ -15,6 +15,7 @@
#pragma once
#include <cmath>
#include <string>
#include <vector>
#include "lite/backends/arm/math/conv_impl.h"
#include "lite/backends/arm/math/funcs.h"
......@@ -94,6 +95,15 @@ class GemmLikeConv : public KernelLite<TARGET(kARM), Ptype> {
virtual void PrepareForRun();
virtual void Run();
#ifdef LITE_WITH_PROFILE
virtual void SetProfileRuntimeKernelInfo(
paddle::lite::profile::OpCharacter* ch) {
ch->kernel_func_name = kernel_func_name_;
}
std::string kernel_func_name_{"NotImplForConvGemm"};
#endif
/// todo, support inplace weights transform
protected:
using param_t = operators::ConvParam;
......
......@@ -13,6 +13,7 @@
// limitations under the License.
#pragma once
#include <string>
#include "lite/backends/arm/math/funcs.h"
#include "lite/core/kernel.h"
#include "lite/operators/conv_transpose_op.h"
......@@ -33,6 +34,14 @@ class Conv2DTransposeCompute
~Conv2DTransposeCompute() = default;
#ifdef LITE_WITH_PROFILE
virtual void SetProfileRuntimeKernelInfo(
paddle::lite::profile::OpCharacter* ch) {
ch->kernel_func_name = kernel_func_name_;
}
std::string kernel_func_name_{"NotImplForConvTranspose"};
#endif
protected:
int workspace_size_{0};
};
......
......@@ -94,6 +94,14 @@ void WinogradConv<PRECISION(kFloat), PRECISION(kFloat)>::PrepareForRun() {
ReInitWhenNeeded();
}
#ifdef LITE_WITH_PROFILE
template <>
void WinogradConv<PRECISION(kFloat), PRECISION(kFloat)>::
SetProfileRuntimeKernelInfo(paddle::lite::profile::OpCharacter* ch) {
ch->kernel_func_name = kernel_func_name_;
}
#endif
template <>
void WinogradConv<PRECISION(kFloat), PRECISION(kFloat)>::Run() {
auto& param = this->Param<param_t>();
......@@ -130,6 +138,9 @@ void WinogradConv<PRECISION(kFloat), PRECISION(kFloat)>::Run() {
b_data,
param,
&ctx);
#ifdef LITE_WITH_PROFILE
kernel_func_name_ = "conv_compute_6x6_3x3";
#endif
} else {
int tile_block = 8;
int block_count =
......@@ -148,6 +159,9 @@ void WinogradConv<PRECISION(kFloat), PRECISION(kFloat)>::Run() {
b_data,
param,
&ctx);
#ifdef LITE_WITH_PROFILE
kernel_func_name_ = "conv_compute_2x2_3x3";
#endif
} else {
lite::arm::math::conv_compute_2x2_3x3_small(i_data,
o_data,
......@@ -162,6 +176,9 @@ void WinogradConv<PRECISION(kFloat), PRECISION(kFloat)>::Run() {
b_data,
param,
&ctx);
#ifdef LITE_WITH_PROFILE
kernel_func_name_ = "conv_compute_2x2_3x3_small";
#endif
}
}
}
......
......@@ -15,6 +15,7 @@
#pragma once
#include <cmath>
#include <string>
#include "lite/backends/arm/math/conv_impl.h"
#include "lite/core/context.h"
#include "lite/core/kernel.h"
......@@ -34,6 +35,13 @@ class WinogradConv : public KernelLite<TARGET(kARM), Ptype> {
virtual void PrepareForRun();
virtual void ReInitWhenNeeded();
virtual void Run();
#ifdef LITE_WITH_PROFILE
virtual void SetProfileRuntimeKernelInfo(
paddle::lite::profile::OpCharacter* ch) {
ch->kernel_func_name = kernel_func_name_;
}
std::string kernel_func_name_{"NotImplForConvWino"};
#endif
protected:
using param_t = operators::ConvParam;
......
......@@ -22,6 +22,9 @@
#include "lite/core/tensor.h"
#include "lite/operators/op_params.h"
#include "lite/utils/all.h"
#ifdef LITE_WITH_PROFILE
#include "lite/api/paddle_place.h"
#endif
namespace paddle {
namespace lite {
......@@ -44,12 +47,13 @@ class ConvOpLite : public OpLite {
ch->input_shape = ch->DimToStr(input_dims);
ch->output_shape = ch->DimToStr(output_dims);
ch->filter_shape = ch->DimToStr(filter_dims);
ch->remark = std::to_string(filter_dims[2]) + "x" +
std::to_string(filter_dims[3]) + "p" +
std::to_string((*param_.paddings)[0]) + "s" +
std::to_string(param_.strides[0]) + "g" +
std::to_string(param_.groups) + "d" +
std::to_string((*param_.dilations)[0]);
ch->remark =
std::to_string(filter_dims[2]) + "x" + std::to_string(filter_dims[3]) +
"p" + std::to_string((*param_.paddings)[0]) + "s" +
std::to_string(param_.strides[0]) + "g" +
std::to_string(param_.groups) + "d" +
std::to_string((*param_.dilations)[0]) + (param_.bias ? "Bias" : "") +
ActivationTypeToStr(param_.activation_param.active_type);
// MACs = 2.f * kw * kh * batchsize * out_c * out_h * out_w * in_c / group
// GMACs = 1e-9f * MACs
// GMACPS = 1e-6f * MACs / predict_ms
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册