提交 d1328b25 编写于 作者: Y Yuan Shuai 提交者: GitHub

[LITE][PROFILE] Enhance ARM CPU profiler with real backend kernel name (#3674)

* [LITE][PROFILE] Enhance ARM CPU profiler with real backend kernel. test=develop
上级 42af036d
...@@ -112,7 +112,7 @@ std::string Profiler::Summary(Type type, bool concise, size_t w) { ...@@ -112,7 +112,7 @@ std::string Profiler::Summary(Type type, bool concise, size_t w) {
if (!concise) { if (!concise) {
ss << " " << setw(24) << left << "KernelName"; ss << " " << setw(24) << left << "KernelName";
} }
ss << " " << setw(16) << left << "Remark"; ss << " " << setw(26) << left << "Remark";
if (!concise) { if (!concise) {
ss << " " << setw(15) << left << "InDim" ss << " " << setw(15) << left << "InDim"
<< " " << setw(15) << left << "FilterDim" << " " << setw(15) << left << "FilterDim"
...@@ -185,7 +185,7 @@ std::string Profiler::Summary(Type type, bool concise, size_t w) { ...@@ -185,7 +185,7 @@ std::string Profiler::Summary(Type type, bool concise, size_t w) {
// clang-format off // clang-format off
ss << setw(20) << left << fixed << item.first.op_type ss << setw(20) << left << fixed << item.first.op_type
<< " " << setw(30) << left << fixed << item.first.kernel_attr << " " << setw(30) << left << fixed << item.first.kernel_attr
<< " " << setw(16) << left << fixed << item.first.remark << " " << setw(26) << left << fixed << item.first.remark
<< " " << setw(7) << left << fixed << setprecision(3) << " " << setw(7) << left << fixed << setprecision(3)
<< item.second.avg << item.second.avg
<< " " << setw(7) << left << fixed << setprecision(3) << " " << setw(7) << left << fixed << setprecision(3)
...@@ -244,7 +244,7 @@ std::string Profiler::Summary(Type type, bool concise, size_t w) { ...@@ -244,7 +244,7 @@ std::string Profiler::Summary(Type type, bool concise, size_t w) {
<< " " << setw(30) << left << fixed << unit.Character().kernel_attr << " " << setw(30) << left << fixed << unit.Character().kernel_attr
<< " " << setw(24) << left << fixed << " " << setw(24) << left << fixed
<< unit.Character().kernel_func_name << unit.Character().kernel_func_name
<< " " << setw(16) << left << fixed << unit.Character().remark << " " << setw(26) << left << fixed << unit.Character().remark
<< " " << setw(15) << left << fixed << unit.Character().input_shape << " " << setw(15) << left << fixed << unit.Character().input_shape
<< " " << setw(15) << left << fixed << unit.Character().filter_shape << " " << setw(15) << left << fixed << unit.Character().filter_shape
<< " " << setw(15) << left << fixed << unit.Character().output_shape << " " << setw(15) << left << fixed << unit.Character().output_shape
......
...@@ -15,6 +15,9 @@ ...@@ -15,6 +15,9 @@
#pragma once #pragma once
#include "lite/backends/arm/math/funcs.h" #include "lite/backends/arm/math/funcs.h"
#include "lite/core/kernel.h" #include "lite/core/kernel.h"
#ifdef LITE_WITH_PROFILE
#include "lite/core/profile/profiler.h"
#endif
namespace paddle { namespace paddle {
namespace lite { namespace lite {
...@@ -36,6 +39,13 @@ class ConvCompute : public KernelLite<TARGET(kARM), Ptype> { ...@@ -36,6 +39,13 @@ class ConvCompute : public KernelLite<TARGET(kARM), Ptype> {
impl_->Run(); impl_->Run();
} }
#ifdef LITE_WITH_PROFILE
virtual void SetProfileRuntimeKernelInfo(
paddle::lite::profile::OpCharacter* ch) {
impl_->SetProfileRuntimeKernelInfo(ch);
}
#endif
~ConvCompute() { ~ConvCompute() {
if (impl_ != nullptr) { if (impl_ != nullptr) {
delete impl_; delete impl_;
......
...@@ -50,6 +50,9 @@ void DepthwiseConv<PRECISION(kFloat), PRECISION(kFloat)>::PrepareForRun() { ...@@ -50,6 +50,9 @@ void DepthwiseConv<PRECISION(kFloat), PRECISION(kFloat)>::PrepareForRun() {
flag_trans_weights_ = true; flag_trans_weights_ = true;
} }
impl_ = lite::arm::math::conv_depthwise_3x3_fp32; impl_ = lite::arm::math::conv_depthwise_3x3_fp32;
#ifdef LITE_WITH_PROFILE
kernel_func_name_ = "conv_depthwise_3x3_fp32";
#endif
} else if (kw == 5) { } else if (kw == 5) {
// VLOG(5) << "invoke 5x5 dw conv fp32"; // VLOG(5) << "invoke 5x5 dw conv fp32";
auto strides = param.strides; auto strides = param.strides;
...@@ -67,6 +70,9 @@ void DepthwiseConv<PRECISION(kFloat), PRECISION(kFloat)>::PrepareForRun() { ...@@ -67,6 +70,9 @@ void DepthwiseConv<PRECISION(kFloat), PRECISION(kFloat)>::PrepareForRun() {
w_data_in, w_data, oc, 1, cblock, kh * kw); w_data_in, w_data, oc, 1, cblock, kh * kw);
flag_trans_weights_ = true; flag_trans_weights_ = true;
impl_ = lite::arm::math::conv_depthwise_5x5_fp32; impl_ = lite::arm::math::conv_depthwise_5x5_fp32;
#ifdef LITE_WITH_PROFILE
kernel_func_name_ = "conv_depthwise_5x5_fp32";
#endif
} else { } else {
LOG(FATAL) LOG(FATAL)
<< "5x5 depthwise conv only support stride == 1 or stride == 2"; << "5x5 depthwise conv only support stride == 1 or stride == 2";
...@@ -103,6 +109,9 @@ void DepthwiseConv<PRECISION(kInt8), PRECISION(kFloat)>::PrepareForRun() { ...@@ -103,6 +109,9 @@ void DepthwiseConv<PRECISION(kInt8), PRECISION(kFloat)>::PrepareForRun() {
// trans weights // trans weights
// VLOG(5) << "invoke 3x3 dw conv int8 kernel fp32 out"; // VLOG(5) << "invoke 3x3 dw conv int8 kernel fp32 out";
impl_ = lite::arm::math::conv_depthwise_3x3_int8_fp32; impl_ = lite::arm::math::conv_depthwise_3x3_int8_fp32;
#ifdef LITE_WITH_PROFILE
kernel_func_name_ = "conv_depthwise_3x3_int8_fp32";
#endif
int cround = ROUNDUP(w_dims[0], 8); int cround = ROUNDUP(w_dims[0], 8);
weights_.Resize({cround / 8, 1, kh * kw, 8}); weights_.Resize({cround / 8, 1, kh * kw, 8});
auto wptr = param.filter->data<int8_t>(); auto wptr = param.filter->data<int8_t>();
...@@ -113,6 +122,9 @@ void DepthwiseConv<PRECISION(kInt8), PRECISION(kFloat)>::PrepareForRun() { ...@@ -113,6 +122,9 @@ void DepthwiseConv<PRECISION(kInt8), PRECISION(kFloat)>::PrepareForRun() {
// trans weights // trans weights
// VLOG(5) << "invoke 5x5 dw conv int8 kernel fp32 out"; // VLOG(5) << "invoke 5x5 dw conv int8 kernel fp32 out";
impl_ = lite::arm::math::conv_depthwise_5x5_int8_fp32; impl_ = lite::arm::math::conv_depthwise_5x5_int8_fp32;
#ifdef LITE_WITH_PROFILE
kernel_func_name_ = "conv_depthwise_5x5_int8_fp32";
#endif
int cround = ROUNDUP(w_dims[0], 8); int cround = ROUNDUP(w_dims[0], 8);
weights_.Resize({cround / 8, 1, kh * kw, 8}); weights_.Resize({cround / 8, 1, kh * kw, 8});
auto wptr = param.filter->data<int8_t>(); auto wptr = param.filter->data<int8_t>();
...@@ -162,6 +174,9 @@ void DepthwiseConv<PRECISION(kInt8), PRECISION(kInt8)>::PrepareForRun() { ...@@ -162,6 +174,9 @@ void DepthwiseConv<PRECISION(kInt8), PRECISION(kInt8)>::PrepareForRun() {
// trans weights // trans weights
// VLOG(5) << "invoke 3x3 dw conv int8 kernel int8 out"; // VLOG(5) << "invoke 3x3 dw conv int8 kernel int8 out";
impl_ = lite::arm::math::conv_depthwise_3x3_int8_int8; impl_ = lite::arm::math::conv_depthwise_3x3_int8_int8;
#ifdef LITE_WITH_PROFILE
kernel_func_name_ = "conv_depthwise_3x3_int8_int8";
#endif
int cround = ROUNDUP(w_dims[0], 8); int cround = ROUNDUP(w_dims[0], 8);
weights_.Resize({cround / 8, 1, kh * kw, 8}); weights_.Resize({cround / 8, 1, kh * kw, 8});
auto wptr = param.filter->data<int8_t>(); auto wptr = param.filter->data<int8_t>();
...@@ -172,6 +187,9 @@ void DepthwiseConv<PRECISION(kInt8), PRECISION(kInt8)>::PrepareForRun() { ...@@ -172,6 +187,9 @@ void DepthwiseConv<PRECISION(kInt8), PRECISION(kInt8)>::PrepareForRun() {
// trans weights // trans weights
// VLOG(5) << "invoke 5x5 dw conv int8 kernel int8 out"; // VLOG(5) << "invoke 5x5 dw conv int8 kernel int8 out";
impl_ = lite::arm::math::conv_depthwise_5x5_int8_int8; impl_ = lite::arm::math::conv_depthwise_5x5_int8_int8;
#ifdef LITE_WITH_PROFILE
kernel_func_name_ = "conv_depthwise_5x5_int8_int8";
#endif
int cround = ROUNDUP(w_dims[0], 8); int cround = ROUNDUP(w_dims[0], 8);
weights_.Resize({cround / 8, 1, kh * kw, 8}); weights_.Resize({cround / 8, 1, kh * kw, 8});
auto wptr = param.filter->data<int8_t>(); auto wptr = param.filter->data<int8_t>();
...@@ -183,6 +201,14 @@ void DepthwiseConv<PRECISION(kInt8), PRECISION(kInt8)>::PrepareForRun() { ...@@ -183,6 +201,14 @@ void DepthwiseConv<PRECISION(kInt8), PRECISION(kInt8)>::PrepareForRun() {
} }
} }
#ifdef LITE_WITH_PROFILE
template <>
void DepthwiseConv<PRECISION(kFloat), PRECISION(kFloat)>::
SetProfileRuntimeKernelInfo(paddle::lite::profile::OpCharacter* ch) {
ch->kernel_func_name = kernel_func_name_;
}
#endif
template <> template <>
void DepthwiseConv<PRECISION(kFloat), PRECISION(kFloat)>::Run() { void DepthwiseConv<PRECISION(kFloat), PRECISION(kFloat)>::Run() {
auto& param = this->Param<param_t>(); auto& param = this->Param<param_t>();
...@@ -225,6 +251,14 @@ void DepthwiseConv<PRECISION(kFloat), PRECISION(kFloat)>::Run() { ...@@ -225,6 +251,14 @@ void DepthwiseConv<PRECISION(kFloat), PRECISION(kFloat)>::Run() {
w_scale_.data()); w_scale_.data());
} }
#ifdef LITE_WITH_PROFILE
template <>
void DepthwiseConv<PRECISION(kInt8), PRECISION(kFloat)>::
SetProfileRuntimeKernelInfo(paddle::lite::profile::OpCharacter* ch) {
ch->kernel_func_name = kernel_func_name_;
}
#endif
template <> template <>
void DepthwiseConv<PRECISION(kInt8), PRECISION(kFloat)>::Run() { void DepthwiseConv<PRECISION(kInt8), PRECISION(kFloat)>::Run() {
auto& param = this->Param<param_t>(); auto& param = this->Param<param_t>();
...@@ -267,6 +301,14 @@ void DepthwiseConv<PRECISION(kInt8), PRECISION(kFloat)>::Run() { ...@@ -267,6 +301,14 @@ void DepthwiseConv<PRECISION(kInt8), PRECISION(kFloat)>::Run() {
w_scale_.data()); w_scale_.data());
} }
#ifdef LITE_WITH_PROFILE
template <>
void DepthwiseConv<PRECISION(kInt8), PRECISION(kInt8)>::
SetProfileRuntimeKernelInfo(paddle::lite::profile::OpCharacter* ch) {
ch->kernel_func_name = kernel_func_name_;
}
#endif
template <> template <>
void DepthwiseConv<PRECISION(kInt8), PRECISION(kInt8)>::Run() { void DepthwiseConv<PRECISION(kInt8), PRECISION(kInt8)>::Run() {
auto& param = this->Param<param_t>(); auto& param = this->Param<param_t>();
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#pragma once #pragma once
#include <cmath> #include <cmath>
#include <string>
#include <vector> #include <vector>
#include "lite/backends/arm/math/conv_impl.h" #include "lite/backends/arm/math/conv_impl.h"
#include "lite/core/context.h" #include "lite/core/context.h"
...@@ -48,6 +49,15 @@ class DepthwiseConv : public KernelLite<TARGET(kARM), Ptype> { ...@@ -48,6 +49,15 @@ class DepthwiseConv : public KernelLite<TARGET(kARM), Ptype> {
virtual void PrepareForRun(); virtual void PrepareForRun();
virtual void Run(); virtual void Run();
#ifdef LITE_WITH_PROFILE
virtual void SetProfileRuntimeKernelInfo(
paddle::lite::profile::OpCharacter* ch) {
ch->kernel_func_name = kernel_func_name_;
}
std::string kernel_func_name_{"NotImplForConvDw"};
#endif
private: private:
using param_t = operators::ConvParam; using param_t = operators::ConvParam;
Tensor weights_; Tensor weights_;
......
...@@ -19,6 +19,14 @@ namespace lite { ...@@ -19,6 +19,14 @@ namespace lite {
namespace kernels { namespace kernels {
namespace arm { namespace arm {
#ifdef LITE_WITH_PROFILE
template <>
void DirectConv<PRECISION(kFloat), PRECISION(kFloat)>::
SetProfileRuntimeKernelInfo(paddle::lite::profile::OpCharacter* ch) {
ch->kernel_func_name = kernel_func_name_;
}
#endif
template <> template <>
void DirectConv<PRECISION(kFloat), PRECISION(kFloat)>::Run() { void DirectConv<PRECISION(kFloat), PRECISION(kFloat)>::Run() {
auto& param = this->Param<param_t>(); auto& param = this->Param<param_t>();
...@@ -62,6 +70,9 @@ void DirectConv<PRECISION(kFloat), PRECISION(kFloat)>::Run() { ...@@ -62,6 +70,9 @@ void DirectConv<PRECISION(kFloat), PRECISION(kFloat)>::Run() {
b_data, b_data,
param, param,
&ctx); &ctx);
#ifdef LITE_WITH_PROFILE
kernel_func_name_ = "conv_3x3s1_direct_fp32";
#endif
} else { } else {
lite::arm::math::conv_3x3s2_direct_fp32(i_data, lite::arm::math::conv_3x3s2_direct_fp32(i_data,
o_data, o_data,
...@@ -76,9 +87,20 @@ void DirectConv<PRECISION(kFloat), PRECISION(kFloat)>::Run() { ...@@ -76,9 +87,20 @@ void DirectConv<PRECISION(kFloat), PRECISION(kFloat)>::Run() {
b_data, b_data,
param, param,
&ctx); &ctx);
#ifdef LITE_WITH_PROFILE
kernel_func_name_ = "conv_3x3s2_direct_fp32";
#endif
} }
} }
#ifdef LITE_WITH_PROFILE
template <>
void DirectConv<PRECISION(kInt8), PRECISION(kFloat)>::
SetProfileRuntimeKernelInfo(paddle::lite::profile::OpCharacter* ch) {
ch->kernel_func_name = kernel_func_name_;
}
#endif
template <> template <>
void DirectConv<PRECISION(kInt8), PRECISION(kFloat)>::Run() { void DirectConv<PRECISION(kInt8), PRECISION(kFloat)>::Run() {
auto& param = this->Param<param_t>(); auto& param = this->Param<param_t>();
...@@ -117,6 +139,9 @@ void DirectConv<PRECISION(kInt8), PRECISION(kFloat)>::Run() { ...@@ -117,6 +139,9 @@ void DirectConv<PRECISION(kInt8), PRECISION(kFloat)>::Run() {
param, param,
&ctx, &ctx,
w_scale_.data()); w_scale_.data());
#ifdef LITE_WITH_PROFILE
kernel_func_name_ = "conv_3x3s1_direct_int8";
#endif
} else { } else {
lite::arm::math::conv_3x3s2_direct_int8(i_data, lite::arm::math::conv_3x3s2_direct_int8(i_data,
o_data, o_data,
...@@ -132,9 +157,20 @@ void DirectConv<PRECISION(kInt8), PRECISION(kFloat)>::Run() { ...@@ -132,9 +157,20 @@ void DirectConv<PRECISION(kInt8), PRECISION(kFloat)>::Run() {
param, param,
&ctx, &ctx,
w_scale_.data()); w_scale_.data());
#ifdef LITE_WITH_PROFILE
kernel_func_name_ = "conv_3x3s2_direct_int8";
#endif
} }
} }
#ifdef LITE_WITH_PROFILE
template <>
void DirectConv<PRECISION(kInt8), PRECISION(kInt8)>::
SetProfileRuntimeKernelInfo(paddle::lite::profile::OpCharacter* ch) {
ch->kernel_func_name = kernel_func_name_;
}
#endif
template <> template <>
void DirectConv<PRECISION(kInt8), PRECISION(kInt8)>::Run() { void DirectConv<PRECISION(kInt8), PRECISION(kInt8)>::Run() {
auto& param = this->Param<param_t>(); auto& param = this->Param<param_t>();
...@@ -173,6 +209,9 @@ void DirectConv<PRECISION(kInt8), PRECISION(kInt8)>::Run() { ...@@ -173,6 +209,9 @@ void DirectConv<PRECISION(kInt8), PRECISION(kInt8)>::Run() {
param, param,
&ctx, &ctx,
w_scale_.data()); w_scale_.data());
#ifdef LITE_WITH_PROFILE
kernel_func_name_ = "conv_3x3s1_direct_int8";
#endif
} else { } else {
lite::arm::math::conv_3x3s2_direct_int8(i_data, lite::arm::math::conv_3x3s2_direct_int8(i_data,
o_data, o_data,
...@@ -188,6 +227,9 @@ void DirectConv<PRECISION(kInt8), PRECISION(kInt8)>::Run() { ...@@ -188,6 +227,9 @@ void DirectConv<PRECISION(kInt8), PRECISION(kInt8)>::Run() {
param, param,
&ctx, &ctx,
w_scale_.data()); w_scale_.data());
#ifdef LITE_WITH_PROFILE
kernel_func_name_ = "conv_3x3s2_direct_int8";
#endif
} }
} }
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#pragma once #pragma once
#include <cmath> #include <cmath>
#include <string>
#include <vector> #include <vector>
#include "lite/backends/arm/math/funcs.h" #include "lite/backends/arm/math/funcs.h"
#include "lite/core/context.h" #include "lite/core/context.h"
...@@ -180,6 +181,15 @@ class DirectConv : public KernelLite<TARGET(kARM), Ptype> { ...@@ -180,6 +181,15 @@ class DirectConv : public KernelLite<TARGET(kARM), Ptype> {
virtual void Run(); virtual void Run();
#ifdef LITE_WITH_PROFILE
virtual void SetProfileRuntimeKernelInfo(
paddle::lite::profile::OpCharacter* ch) {
ch->kernel_func_name = kernel_func_name_;
}
std::string kernel_func_name_{"NotImplForConvDirect"};
#endif
/// todo, support inplace weights transform /// todo, support inplace weights transform
protected: protected:
Tensor weights_; Tensor weights_;
......
...@@ -81,6 +81,14 @@ void GemmLikeConv<PRECISION(kInt8), PRECISION(kInt8)>::PrepareForRun() { ...@@ -81,6 +81,14 @@ void GemmLikeConv<PRECISION(kInt8), PRECISION(kInt8)>::PrepareForRun() {
} }
} }
#ifdef LITE_WITH_PROFILE
template <>
void GemmLikeConv<PRECISION(kFloat), PRECISION(kFloat)>::
SetProfileRuntimeKernelInfo(paddle::lite::profile::OpCharacter* ch) {
ch->kernel_func_name = kernel_func_name_;
}
#endif
template <> template <>
void GemmLikeConv<PRECISION(kFloat), PRECISION(kFloat)>::Run() { void GemmLikeConv<PRECISION(kFloat), PRECISION(kFloat)>::Run() {
auto& param = this->Param<param_t>(); auto& param = this->Param<param_t>();
...@@ -111,12 +119,26 @@ void GemmLikeConv<PRECISION(kFloat), PRECISION(kFloat)>::Run() { ...@@ -111,12 +119,26 @@ void GemmLikeConv<PRECISION(kFloat), PRECISION(kFloat)>::Run() {
if (flag_1x1gemm_) { if (flag_1x1gemm_) {
lite::arm::math::conv1x1s1_gemm( lite::arm::math::conv1x1s1_gemm(
din, dout, bs, oc, oh, ow, ic, ih, iw, weights, bias, param, &ctx); din, dout, bs, oc, oh, ow, ic, ih, iw, weights, bias, param, &ctx);
#ifdef LITE_WITH_PROFILE
kernel_func_name_ = "conv1x1s1_gemm";
#endif
} else { } else {
lite::arm::math::conv_im2col_gemm( lite::arm::math::conv_im2col_gemm(
din, dout, bs, oc, oh, ow, ic, ih, iw, weights, bias, param, &ctx); din, dout, bs, oc, oh, ow, ic, ih, iw, weights, bias, param, &ctx);
#ifdef LITE_WITH_PROFILE
kernel_func_name_ = "conv_im2col_gemm";
#endif
} }
} }
#ifdef LITE_WITH_PROFILE
template <>
void GemmLikeConv<PRECISION(kInt8), PRECISION(kFloat)>::
SetProfileRuntimeKernelInfo(paddle::lite::profile::OpCharacter* ch) {
ch->kernel_func_name = kernel_func_name_;
}
#endif
template <> template <>
void GemmLikeConv<PRECISION(kInt8), PRECISION(kFloat)>::Run() { void GemmLikeConv<PRECISION(kInt8), PRECISION(kFloat)>::Run() {
auto& param = this->Param<param_t>(); auto& param = this->Param<param_t>();
...@@ -159,6 +181,9 @@ void GemmLikeConv<PRECISION(kInt8), PRECISION(kFloat)>::Run() { ...@@ -159,6 +181,9 @@ void GemmLikeConv<PRECISION(kInt8), PRECISION(kFloat)>::Run() {
param, param,
&ctx, &ctx,
w_scale_.data()); w_scale_.data());
#ifdef LITE_WITH_PROFILE
kernel_func_name_ = "conv1x1s1_gemm_int8";
#endif
} else { } else {
lite::arm::math::conv_im2col_gemm_int8(din, lite::arm::math::conv_im2col_gemm_int8(din,
dout, dout,
...@@ -174,9 +199,20 @@ void GemmLikeConv<PRECISION(kInt8), PRECISION(kFloat)>::Run() { ...@@ -174,9 +199,20 @@ void GemmLikeConv<PRECISION(kInt8), PRECISION(kFloat)>::Run() {
param, param,
&ctx, &ctx,
w_scale_.data()); w_scale_.data());
#ifdef LITE_WITH_PROFILE
kernel_func_name_ = "conv_im2col_gemm_int8";
#endif
} }
} }
#ifdef LITE_WITH_PROFILE
template <>
void GemmLikeConv<PRECISION(kInt8), PRECISION(kInt8)>::
SetProfileRuntimeKernelInfo(paddle::lite::profile::OpCharacter* ch) {
ch->kernel_func_name = kernel_func_name_;
}
#endif
template <> template <>
void GemmLikeConv<PRECISION(kInt8), PRECISION(kInt8)>::Run() { void GemmLikeConv<PRECISION(kInt8), PRECISION(kInt8)>::Run() {
auto& param = this->Param<param_t>(); auto& param = this->Param<param_t>();
...@@ -219,6 +255,9 @@ void GemmLikeConv<PRECISION(kInt8), PRECISION(kInt8)>::Run() { ...@@ -219,6 +255,9 @@ void GemmLikeConv<PRECISION(kInt8), PRECISION(kInt8)>::Run() {
param, param,
&ctx, &ctx,
w_scale_.data()); w_scale_.data());
#ifdef LITE_WITH_PROFILE
kernel_func_name_ = "conv1x1s1_gemm_int8";
#endif
} else { } else {
lite::arm::math::conv_im2col_gemm_int8(din, lite::arm::math::conv_im2col_gemm_int8(din,
dout, dout,
...@@ -234,6 +273,9 @@ void GemmLikeConv<PRECISION(kInt8), PRECISION(kInt8)>::Run() { ...@@ -234,6 +273,9 @@ void GemmLikeConv<PRECISION(kInt8), PRECISION(kInt8)>::Run() {
param, param,
&ctx, &ctx,
w_scale_.data()); w_scale_.data());
#ifdef LITE_WITH_PROFILE
kernel_func_name_ = "conv_im2col_gemm_int8";
#endif
} }
} }
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#pragma once #pragma once
#include <cmath> #include <cmath>
#include <string>
#include <vector> #include <vector>
#include "lite/backends/arm/math/conv_impl.h" #include "lite/backends/arm/math/conv_impl.h"
#include "lite/backends/arm/math/funcs.h" #include "lite/backends/arm/math/funcs.h"
...@@ -94,6 +95,15 @@ class GemmLikeConv : public KernelLite<TARGET(kARM), Ptype> { ...@@ -94,6 +95,15 @@ class GemmLikeConv : public KernelLite<TARGET(kARM), Ptype> {
virtual void PrepareForRun(); virtual void PrepareForRun();
virtual void Run(); virtual void Run();
#ifdef LITE_WITH_PROFILE
virtual void SetProfileRuntimeKernelInfo(
paddle::lite::profile::OpCharacter* ch) {
ch->kernel_func_name = kernel_func_name_;
}
std::string kernel_func_name_{"NotImplForConvGemm"};
#endif
/// todo, support inplace weights transform /// todo, support inplace weights transform
protected: protected:
using param_t = operators::ConvParam; using param_t = operators::ConvParam;
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
// limitations under the License. // limitations under the License.
#pragma once #pragma once
#include <string>
#include "lite/backends/arm/math/funcs.h" #include "lite/backends/arm/math/funcs.h"
#include "lite/core/kernel.h" #include "lite/core/kernel.h"
#include "lite/operators/conv_transpose_op.h" #include "lite/operators/conv_transpose_op.h"
...@@ -33,6 +34,14 @@ class Conv2DTransposeCompute ...@@ -33,6 +34,14 @@ class Conv2DTransposeCompute
~Conv2DTransposeCompute() = default; ~Conv2DTransposeCompute() = default;
#ifdef LITE_WITH_PROFILE
virtual void SetProfileRuntimeKernelInfo(
paddle::lite::profile::OpCharacter* ch) {
ch->kernel_func_name = kernel_func_name_;
}
std::string kernel_func_name_{"NotImplForConvTranspose"};
#endif
protected: protected:
int workspace_size_{0}; int workspace_size_{0};
}; };
......
...@@ -94,6 +94,14 @@ void WinogradConv<PRECISION(kFloat), PRECISION(kFloat)>::PrepareForRun() { ...@@ -94,6 +94,14 @@ void WinogradConv<PRECISION(kFloat), PRECISION(kFloat)>::PrepareForRun() {
ReInitWhenNeeded(); ReInitWhenNeeded();
} }
#ifdef LITE_WITH_PROFILE
template <>
void WinogradConv<PRECISION(kFloat), PRECISION(kFloat)>::
SetProfileRuntimeKernelInfo(paddle::lite::profile::OpCharacter* ch) {
ch->kernel_func_name = kernel_func_name_;
}
#endif
template <> template <>
void WinogradConv<PRECISION(kFloat), PRECISION(kFloat)>::Run() { void WinogradConv<PRECISION(kFloat), PRECISION(kFloat)>::Run() {
auto& param = this->Param<param_t>(); auto& param = this->Param<param_t>();
...@@ -130,6 +138,9 @@ void WinogradConv<PRECISION(kFloat), PRECISION(kFloat)>::Run() { ...@@ -130,6 +138,9 @@ void WinogradConv<PRECISION(kFloat), PRECISION(kFloat)>::Run() {
b_data, b_data,
param, param,
&ctx); &ctx);
#ifdef LITE_WITH_PROFILE
kernel_func_name_ = "conv_compute_6x6_3x3";
#endif
} else { } else {
int tile_block = 8; int tile_block = 8;
int block_count = int block_count =
...@@ -148,6 +159,9 @@ void WinogradConv<PRECISION(kFloat), PRECISION(kFloat)>::Run() { ...@@ -148,6 +159,9 @@ void WinogradConv<PRECISION(kFloat), PRECISION(kFloat)>::Run() {
b_data, b_data,
param, param,
&ctx); &ctx);
#ifdef LITE_WITH_PROFILE
kernel_func_name_ = "conv_compute_2x2_3x3";
#endif
} else { } else {
lite::arm::math::conv_compute_2x2_3x3_small(i_data, lite::arm::math::conv_compute_2x2_3x3_small(i_data,
o_data, o_data,
...@@ -162,6 +176,9 @@ void WinogradConv<PRECISION(kFloat), PRECISION(kFloat)>::Run() { ...@@ -162,6 +176,9 @@ void WinogradConv<PRECISION(kFloat), PRECISION(kFloat)>::Run() {
b_data, b_data,
param, param,
&ctx); &ctx);
#ifdef LITE_WITH_PROFILE
kernel_func_name_ = "conv_compute_2x2_3x3_small";
#endif
} }
} }
} }
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#pragma once #pragma once
#include <cmath> #include <cmath>
#include <string>
#include "lite/backends/arm/math/conv_impl.h" #include "lite/backends/arm/math/conv_impl.h"
#include "lite/core/context.h" #include "lite/core/context.h"
#include "lite/core/kernel.h" #include "lite/core/kernel.h"
...@@ -34,6 +35,13 @@ class WinogradConv : public KernelLite<TARGET(kARM), Ptype> { ...@@ -34,6 +35,13 @@ class WinogradConv : public KernelLite<TARGET(kARM), Ptype> {
virtual void PrepareForRun(); virtual void PrepareForRun();
virtual void ReInitWhenNeeded(); virtual void ReInitWhenNeeded();
virtual void Run(); virtual void Run();
#ifdef LITE_WITH_PROFILE
virtual void SetProfileRuntimeKernelInfo(
paddle::lite::profile::OpCharacter* ch) {
ch->kernel_func_name = kernel_func_name_;
}
std::string kernel_func_name_{"NotImplForConvWino"};
#endif
protected: protected:
using param_t = operators::ConvParam; using param_t = operators::ConvParam;
......
...@@ -22,6 +22,9 @@ ...@@ -22,6 +22,9 @@
#include "lite/core/tensor.h" #include "lite/core/tensor.h"
#include "lite/operators/op_params.h" #include "lite/operators/op_params.h"
#include "lite/utils/all.h" #include "lite/utils/all.h"
#ifdef LITE_WITH_PROFILE
#include "lite/api/paddle_place.h"
#endif
namespace paddle { namespace paddle {
namespace lite { namespace lite {
...@@ -44,12 +47,13 @@ class ConvOpLite : public OpLite { ...@@ -44,12 +47,13 @@ class ConvOpLite : public OpLite {
ch->input_shape = ch->DimToStr(input_dims); ch->input_shape = ch->DimToStr(input_dims);
ch->output_shape = ch->DimToStr(output_dims); ch->output_shape = ch->DimToStr(output_dims);
ch->filter_shape = ch->DimToStr(filter_dims); ch->filter_shape = ch->DimToStr(filter_dims);
ch->remark = std::to_string(filter_dims[2]) + "x" + ch->remark =
std::to_string(filter_dims[3]) + "p" + std::to_string(filter_dims[2]) + "x" + std::to_string(filter_dims[3]) +
std::to_string((*param_.paddings)[0]) + "s" + "p" + std::to_string((*param_.paddings)[0]) + "s" +
std::to_string(param_.strides[0]) + "g" + std::to_string(param_.strides[0]) + "g" +
std::to_string(param_.groups) + "d" + std::to_string(param_.groups) + "d" +
std::to_string((*param_.dilations)[0]); std::to_string((*param_.dilations)[0]) + (param_.bias ? "Bias" : "") +
ActivationTypeToStr(param_.activation_param.active_type);
// MACs = 2.f * kw * kh * batchsize * out_c * out_h * out_w * in_c / group // MACs = 2.f * kw * kh * batchsize * out_c * out_h * out_w * in_c / group
// GMACs = 1e-9f * MACs // GMACs = 1e-9f * MACs
// GMACPS = 1e-6f * MACs / predict_ms // GMACPS = 1e-6f * MACs / predict_ms
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册