conv_compute.cc 8.9 KB
Newer Older
Y
Yan Chunwei 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "lite/kernels/arm/conv_compute.h"
16
#include <utility>
Y
Yan Chunwei 已提交
17 18
#include "lite/core/op_registry.h"
#include "lite/core/type_system.h"
19 20 21 22
#include "lite/kernels/arm/conv_depthwise.h"
#include "lite/kernels/arm/conv_direct.h"
#include "lite/kernels/arm/conv_gemmlike.h"
#include "lite/kernels/arm/conv_winograd.h"
Y
Yan Chunwei 已提交
23 24 25 26 27 28

namespace paddle {
namespace lite {
namespace kernels {
namespace arm {

29 30
template <>
void ConvCompute<PRECISION(kFloat), PRECISION(kFloat)>::PrepareForRun() {
Y
Yan Chunwei 已提交
31 32 33 34
  auto& param = this->Param<param_t>();
  auto w_dims = param.filter->dims();
  auto& ctx = this->ctx_->template As<ARMContext>();

35 36
  int ic = w_dims[1] * param.groups;
  int oc = w_dims[0];
Y
Yan Chunwei 已提交
37 38 39 40 41
  int kh = w_dims[2];  // oihw
  int kw = w_dims[3];
  int pad = param.paddings[0];
  int stride = param.strides[0];

42 43 44 45 46 47 48
  int chin = param.x->dims()[1];
  int hin = param.x->dims()[2];
  int win = param.x->dims()[3];
  int chout = param.output->dims()[1];
  int hout = param.output->dims()[2];
  int wout = param.output->dims()[3];

Y
Yan Chunwei 已提交
49 50 51
  bool kps_equal = (param.paddings[0] == param.paddings[1]) &&
                   (param.strides[0] == param.strides[1]) && (kw == kh);
  bool no_dilation = (param.dilations[0] == 1) && (param.dilations[1] == 1);
52
  bool flag_dw_3x3 = (kw == 3 && kh == 3 && (stride == 1 || stride == 2));
Y
Yan Chunwei 已提交
53 54 55 56
  bool flag_dw_5x5 =
      (kw == 5 && stride == 1) || (kw == 5 && stride == 2 && pad == 2);
  bool flag_dw = flag_dw_3x3 || flag_dw_5x5;

57
  /// select conv impl
Y
Yan Chunwei 已提交
58
  if (param.groups == ic && ic == oc && kps_equal && no_dilation && flag_dw) {
59 60
    /// dw conv impl
    impl_ = new DepthwiseConv<PRECISION(kFloat), PRECISION(kFloat)>;
Y
Yan Chunwei 已提交
61 62 63
    VLOG(3) << "invoking dw conv";
  } else if (param.groups == 1 && kw == 3 && stride == 1 && kps_equal &&
             no_dilation) {
64
    if (ic >= 32 && oc >= 32 && hout > 16 && wout > 16) {
65 66
      /// winograd conv impl
      impl_ = new WinogradConv<PRECISION(kFloat), PRECISION(kFloat)>;
Y
Yan Chunwei 已提交
67 68
      VLOG(3) << "invoking winograd conv";
    } else {
69 70
      /// direct conv impl
      impl_ = new DirectConv<PRECISION(kFloat), PRECISION(kFloat)>;
Y
Yan Chunwei 已提交
71 72
      VLOG(3) << "invoking direct conv";
    }
73 74
  } else if (param.groups == 1 && kw == 3 && stride == 2 &&
             chin * chout < 4 * hin * win && kps_equal && no_dilation) {
75 76
    /// direct conv impl
    impl_ = new DirectConv<PRECISION(kFloat), PRECISION(kFloat)>;
Y
Yan Chunwei 已提交
77 78
    VLOG(3) << "invoking direct conv";
  } else {
79
    impl_ = new GemmLikeConv<PRECISION(kFloat), PRECISION(kFloat)>;
Y
Yan Chunwei 已提交
80 81
    VLOG(3) << "invoking gemm like conv";
  }
82 83 84 85
  impl_->SetContext(std::move(this->ctx_));
  impl_->SetParam(param);
  impl_->PrepareForRun();
  is_first_epoch_ = false;
Y
Yan Chunwei 已提交
86 87
}

88 89
template <>
void ConvCompute<PRECISION(kInt8), PRECISION(kFloat)>::PrepareForRun() {
Y
Yan Chunwei 已提交
90 91 92 93 94
  auto& param = this->Param<param_t>();
  auto w_dims = param.filter->dims();

  auto& ctx = this->ctx_->template As<ARMContext>();

95 96
  int ic = param.groups * w_dims[1];
  int oc = w_dims[0];
Y
Yan Chunwei 已提交
97 98 99 100 101 102 103 104 105
  int kh = w_dims[2];  // oihw
  int kw = w_dims[3];
  int ph = param.paddings[1];
  int pw = param.paddings[0];
  int sh = param.strides[1];
  int sw = param.strides[0];

  bool kps_equal = (pw == ph) && (sh == sw) && (kw == kh);
  bool no_dilation = (param.dilations[0] == 1) && (param.dilations[1] == 1);
106
  bool flag_dw_3x3 = (kw == 3 && kh == 3) && (sw == 1 || sw == 2);
Y
yiicy 已提交
107
  bool flag_dw_5x5 = (kw == 5 && sw == 1);
Y
Yan Chunwei 已提交
108 109 110
  bool flag_dw = flag_dw_3x3 || flag_dw_5x5;

  if (param.groups == ic && ic == oc && kps_equal && no_dilation && flag_dw) {
111
    impl_ = new DepthwiseConv<PRECISION(kInt8), PRECISION(kFloat)>;
Y
Yan Chunwei 已提交
112 113 114
    VLOG(3) << "Run DepthwiseConv Int8";
  } else if (param.groups == 1 && kw == 3 && (sw == 1 || sw == 2) &&
             kps_equal && no_dilation) {
115
    impl_ = new DirectConv<PRECISION(kInt8), PRECISION(kFloat)>;
Y
Yan Chunwei 已提交
116 117
    VLOG(3) << "Run DirectConv Int8";
  } else {
118
    impl_ = new GemmLikeConv<PRECISION(kInt8), PRECISION(kFloat)>;
Y
Yan Chunwei 已提交
119 120
    VLOG(3) << "Run GemmLikeConvInt8";
  }
121 122 123 124
  impl_->SetContext(std::move(this->ctx_));
  impl_->SetParam(param);
  impl_->PrepareForRun();
  is_first_epoch_ = false;
Y
Yan Chunwei 已提交
125 126
}

127 128
template <>
void ConvCompute<PRECISION(kInt8), PRECISION(kInt8)>::PrepareForRun() {
Y
Yan Chunwei 已提交
129
  auto& param = this->Param<param_t>();
130 131 132 133 134 135 136 137 138 139 140 141
  auto w_dims = param.filter->dims();

  auto& ctx = this->ctx_->template As<ARMContext>();

  int ic = w_dims[1] * param.groups;
  int oc = w_dims[0];
  int kh = w_dims[2];  // oihw
  int kw = w_dims[3];
  int ph = param.paddings[1];
  int pw = param.paddings[0];
  int sh = param.strides[1];
  int sw = param.strides[0];
Y
Yan Chunwei 已提交
142

143 144 145
  bool kps_equal = (pw == ph) && (sh == sw) && (kw == kh);
  bool no_dilation = (param.dilations[0] == 1) && (param.dilations[1] == 1);
  bool flag_dw_3x3 = (kw == 3 && kh == 3) && (sw == 1 || sw == 2);
Y
yiicy 已提交
146
  bool flag_dw_5x5 = (kw == 5 && sw == 1);
147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164
  bool flag_dw = flag_dw_3x3 || flag_dw_5x5;

  if (param.groups == ic && ic == oc && kps_equal && no_dilation && flag_dw) {
    impl_ = new DepthwiseConv<PRECISION(kInt8), PRECISION(kInt8)>;
    VLOG(3) << "Run DepthwiseConv Int8";
  } else if (param.groups == 1 && kw == 3 && (sw == 1 || sw == 2) &&
             kps_equal && no_dilation) {
    impl_ = new DirectConv<PRECISION(kInt8), PRECISION(kInt8)>;
    VLOG(3) << "Run DirectConv Int8";
  } else {
    impl_ = new GemmLikeConv<PRECISION(kInt8), PRECISION(kInt8)>;
    VLOG(3) << "Run GemmLikeConvInt8";
  }
  impl_->SetContext(std::move(this->ctx_));
  impl_->SetParam(param);
  impl_->PrepareForRun();
  is_first_epoch_ = false;
}
Y
Yan Chunwei 已提交
165 166 167 168 169 170

}  // namespace arm
}  // namespace kernels
}  // namespace lite
}  // namespace paddle

171 172 173 174 175 176 177 178 179 180 181
typedef paddle::lite::kernels::arm::ConvCompute<PRECISION(kFloat),
                                                PRECISION(kFloat)>
    ConvFp32;
typedef paddle::lite::kernels::arm::ConvCompute<PRECISION(kInt8),
                                                PRECISION(kFloat)>
    ConvInt8_Fp32;
typedef paddle::lite::kernels::arm::ConvCompute<PRECISION(kInt8),
                                                PRECISION(kInt8)>
    ConvInt8_Int8;

REGISTER_LITE_KERNEL(conv2d, kARM, kFloat, kNCHW, ConvFp32, def)
Y
Yan Chunwei 已提交
182 183 184 185 186 187
    .BindInput("Input", {LiteType::GetTensorTy(TARGET(kARM))})
    .BindInput("Bias", {LiteType::GetTensorTy(TARGET(kARM))})
    .BindInput("Filter", {LiteType::GetTensorTy(TARGET(kARM))})
    .BindOutput("Output", {LiteType::GetTensorTy(TARGET(kARM))})
    .Finalize();

188
REGISTER_LITE_KERNEL(depthwise_conv2d, kARM, kFloat, kNCHW, ConvFp32, def)
Y
Yan Chunwei 已提交
189 190 191 192 193 194
    .BindInput("Input", {LiteType::GetTensorTy(TARGET(kARM))})
    .BindInput("Bias", {LiteType::GetTensorTy(TARGET(kARM))})
    .BindInput("Filter", {LiteType::GetTensorTy(TARGET(kARM))})
    .BindOutput("Output", {LiteType::GetTensorTy(TARGET(kARM))})
    .Finalize();

195
REGISTER_LITE_KERNEL(conv2d, kARM, kInt8, kNCHW, ConvInt8_Int8, int8_out)
Y
Yan Chunwei 已提交
196 197 198 199 200 201 202 203
    .BindInput("Input", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt8))})
    .BindInput("Bias", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt32))})
    .BindInput("Filter",
               {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt8))})
    .BindOutput("Output",
                {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt8))})
    .Finalize();

204
REGISTER_LITE_KERNEL(conv2d, kARM, kInt8, kNCHW, ConvInt8_Fp32, fp32_out)
Y
Yan Chunwei 已提交
205 206 207 208 209 210 211 212 213
    .BindInput("Input", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt8))})
    .BindInput("Bias", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt32))})
    .BindInput("Filter",
               {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt8))})
    .BindOutput("Output",
                {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kFloat))})
    .Finalize();

REGISTER_LITE_KERNEL(
214
    depthwise_conv2d, kARM, kInt8, kNCHW, ConvInt8_Int8, int8_out)
Y
Yan Chunwei 已提交
215 216 217 218 219 220 221 222 223
    .BindInput("Input", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt8))})
    .BindInput("Bias", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt32))})
    .BindInput("Filter",
               {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt8))})
    .BindOutput("Output",
                {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt8))})
    .Finalize();

REGISTER_LITE_KERNEL(
224
    depthwise_conv2d, kARM, kInt8, kNCHW, ConvInt8_Fp32, fp32_out)
Y
Yan Chunwei 已提交
225 226 227 228 229 230 231
    .BindInput("Input", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt8))})
    .BindInput("Bias", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt32))})
    .BindInput("Filter",
               {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt8))})
    .BindOutput("Output",
                {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kFloat))})
    .Finalize();