From c67b92b149eb54252be3adb62bac3030b4879c01 Mon Sep 17 00:00:00 2001 From: HappyAngel Date: Mon, 29 Jun 2020 21:40:30 -0500 Subject: [PATCH] [arm] update con int8 kernel choose (#3834) * fix conv int8 kernel choose and sooftmax compute bug * change axis_size = 4 kernel choose, test=develop * fix format. test=develop --- lite/backends/arm/math/softmax.cc | 4 ++-- lite/kernels/arm/conv_compute.cc | 12 ++++++++++-- lite/kernels/arm/conv_winograd.cc | 3 +++ lite/kernels/arm/conv_winograd.h | 7 +++++++ lite/kernels/arm/softmax_compute.cc | 2 +- 5 files changed, 23 insertions(+), 5 deletions(-) diff --git a/lite/backends/arm/math/softmax.cc b/lite/backends/arm/math/softmax.cc index 65d41b0491..b7f82e9f37 100644 --- a/lite/backends/arm/math/softmax.cc +++ b/lite/backends/arm/math/softmax.cc @@ -531,7 +531,7 @@ void softmax_inner1_large_axis(const float* din, } float32x2_t vhmax = vmax_f32(vget_high_f32(vmax), vget_low_f32(vmax)); float max_data = std::max(vget_lane_f32(vhmax, 0), vget_lane_f32(vhmax, 1)); - for (j = 4 * j; j < axis_size; ++j) { + for (j = 4 * nn; j < axis_size; ++j) { max_data = std::max(max_data, din_max_ptr[0]); din_max_ptr++; } @@ -557,7 +557,7 @@ void softmax_inner1_large_axis(const float* din, float32x2_t vhsum = vadd_f32(vget_high_f32(vsum), vget_low_f32(vsum)); float sum_data = vget_lane_f32(vhsum, 0) + vget_lane_f32(vhsum, 1); - for (j = 4 * j; j < axis_size; ++j) { + for (j = 4 * nn; j < axis_size; ++j) { dout_sum_ptr[0] = expf(din_sum_ptr[0] - max_data); sum_data += dout_sum_ptr[0]; din_sum_ptr++; diff --git a/lite/kernels/arm/conv_compute.cc b/lite/kernels/arm/conv_compute.cc index ef174814ce..54e67de5ab 100644 --- a/lite/kernels/arm/conv_compute.cc +++ b/lite/kernels/arm/conv_compute.cc @@ -121,10 +121,14 @@ void ConvCompute::PrepareForRun() { no_dilation && flag_dw) { impl_ = new DepthwiseConv; // VLOG(3) << "Run DepthwiseConv Int8"; + } else if (param.groups == 1 && kw == 3 && sw == 2 && no_dilation && + pads_equal) { + impl_ = new DirectConv; + // VLOG(3) << "Run DirectConv Int8"; } else if (param.groups == 1 && kw == 3 && sw == 1 && no_dilation && pads_equal) { impl_ = new WinogradConv; - // VLOG(3) << "Run DirectConv Int8"; + // VLOG(3) << "Run WinogradConv Int8"; } else { impl_ = new GemmLikeConv; // VLOG(3) << "Run GemmLikeConvInt8"; @@ -168,10 +172,14 @@ void ConvCompute::PrepareForRun() { no_dilation && flag_dw) { impl_ = new DepthwiseConv; // VLOG(3) << "Run DepthwiseConv Int8"; + } else if (param.groups == 1 && kw == 3 && sw == 2 && no_dilation && + pads_equal) { + impl_ = new DirectConv; + // VLOG(3) << "Run DirectConv Int8"; } else if (param.groups == 1 && kw == 3 && sw == 1 && no_dilation && pads_equal) { impl_ = new WinogradConv; - // VLOG(3) << "Run DirectConv Int8"; + // VLOG(3) << "Run WinogradConv Int8"; } else { impl_ = new GemmLikeConv; // VLOG(3) << "Run GemmLikeConvInt8"; diff --git a/lite/kernels/arm/conv_winograd.cc b/lite/kernels/arm/conv_winograd.cc index c6e06a243c..f61c6109cd 100644 --- a/lite/kernels/arm/conv_winograd.cc +++ b/lite/kernels/arm/conv_winograd.cc @@ -358,6 +358,9 @@ void WinogradConv::Run() { param, &ctx); } +#ifdef LITE_WITH_PROFILE + kernel_func_name_ = "conv_compute_2x2_3x3_int8"; +#endif } template class WinogradConv; template class WinogradConv; diff --git a/lite/kernels/arm/conv_winograd.h b/lite/kernels/arm/conv_winograd.h index 69835a74b4..b93a719f7d 100644 --- a/lite/kernels/arm/conv_winograd.h +++ b/lite/kernels/arm/conv_winograd.h @@ -61,6 +61,13 @@ class WinogradConv virtual void PrepareForRun(); virtual void ReInitWhenNeeded(); virtual void Run(); +#ifdef LITE_WITH_PROFILE + virtual void SetProfileRuntimeKernelInfo( + paddle::lite::profile::OpCharacter* ch) { + ch->kernel_func_name = kernel_func_name_; + } + std::string kernel_func_name_{"NotImplForConvWino"}; +#endif protected: using param_t = operators::ConvParam; diff --git a/lite/kernels/arm/softmax_compute.cc b/lite/kernels/arm/softmax_compute.cc index 3409d0f5c5..79ea23ab3f 100644 --- a/lite/kernels/arm/softmax_compute.cc +++ b/lite/kernels/arm/softmax_compute.cc @@ -34,7 +34,7 @@ void SoftmaxCompute::Run() { int inner_num = x_dims.Slice(axis + 1, x_rank).production(); int axis_size = x_dims[axis]; if (inner_num == 1) { - if (axis_size >= 4) { + if (axis_size > 4) { lite::arm::math::softmax_inner1_large_axis( din, dout, outer_num, axis_size); } else { -- GitLab