conv_compute.cc 9.9 KB
Newer Older
Y
Yan Chunwei 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "lite/kernels/arm/conv_compute.h"
16
#include <utility>
Y
Yan Chunwei 已提交
17 18
#include "lite/core/op_registry.h"
#include "lite/core/type_system.h"
19 20 21 22
#include "lite/kernels/arm/conv_depthwise.h"
#include "lite/kernels/arm/conv_direct.h"
#include "lite/kernels/arm/conv_gemmlike.h"
#include "lite/kernels/arm/conv_winograd.h"
Y
Yan Chunwei 已提交
23 24 25 26 27 28

namespace paddle {
namespace lite {
namespace kernels {
namespace arm {

29 30
template <>
void ConvCompute<PRECISION(kFloat), PRECISION(kFloat)>::PrepareForRun() {
Y
Yan Chunwei 已提交
31 32 33 34
  auto& param = this->Param<param_t>();
  auto w_dims = param.filter->dims();
  auto& ctx = this->ctx_->template As<ARMContext>();

H
HappyAngel 已提交
35 36
  auto paddings = *param.paddings;
  auto dilations = *param.dilations;
37 38
  int ic = w_dims[1] * param.groups;
  int oc = w_dims[0];
Y
Yan Chunwei 已提交
39 40
  int kh = w_dims[2];  // oihw
  int kw = w_dims[3];
H
HappyAngel 已提交
41
  int pad = paddings[0];
Y
Yan Chunwei 已提交
42
  int stride = param.strides[0];
T
TianXiaogang 已提交
43
  int threads = ctx.threads();
Y
Yan Chunwei 已提交
44

45 46 47 48 49 50 51
  int chin = param.x->dims()[1];
  int hin = param.x->dims()[2];
  int win = param.x->dims()[3];
  int chout = param.output->dims()[1];
  int hout = param.output->dims()[2];
  int wout = param.output->dims()[3];

52 53
  bool pads_equal =
      ((paddings[0] == paddings[1]) && (paddings[2] == paddings[3]));
H
HappyAngel 已提交
54 55
  bool pads_all_equal = (pads_equal && paddings[0] == paddings[2]);

56
  bool ks_equal = (param.strides[0] == param.strides[1]) && (kw == kh);
H
HappyAngel 已提交
57
  bool no_dilation = (dilations[0] == 1) && (dilations[1] == 1);
58 59 60

  bool flag_dw_3x3 = (kw == 3) && (kh == 3) && (stride == 1 || stride == 2);
  bool flag_dw_5x5 = (kw == 5) && (kh == 5) && (stride == 1 || stride == 2);
Y
Yan Chunwei 已提交
61

H
HappyAngel 已提交
62 63 64
#ifdef __aarch64__
#else
  bool flag =
H
HappyAngel 已提交
65
      (stride == 1 && (paddings[0] > 1 || paddings[2] > 1)) ? false : true;
H
HappyAngel 已提交
66
  flag_dw_3x3 = flag_dw_3x3 && flag;
H
HappyAngel 已提交
67
#endif
68 69
  bool flag_dw = flag_dw_3x3 || flag_dw_5x5;

70
  /// select conv impl
71
  if (param.groups == ic && ic == oc && ks_equal && no_dilation && flag_dw) {
72
    impl_ = new DepthwiseConv<PRECISION(kFloat), PRECISION(kFloat)>;
73
    // VLOG(3) << "invoking dw conv";
74
  } else if (param.groups == 1 && kw == 3 && stride == 1 && ks_equal &&
75
             no_dilation) {
T
TianXiaogang 已提交
76
    impl_ = new WinogradConv<PRECISION(kFloat), PRECISION(kFloat)>;
77
    // VLOG(3) << "invoking winograd conv";
78
  } else if (param.groups == 1 && kw == 3 && stride == 2 &&
79
             chin * chout < 4 * hin * win && ks_equal && no_dilation) {
80
    impl_ = new DirectConv<PRECISION(kFloat), PRECISION(kFloat)>;
81
    // VLOG(3) << "invoking direct conv";
Y
Yan Chunwei 已提交
82
  } else {
83
    impl_ = new GemmLikeConv<PRECISION(kFloat), PRECISION(kFloat)>;
84
    // VLOG(3) << "invoking gemm like conv";
Y
Yan Chunwei 已提交
85
  }
86 87 88 89
  impl_->SetContext(std::move(this->ctx_));
  impl_->SetParam(param);
  impl_->PrepareForRun();
  is_first_epoch_ = false;
Y
Yan Chunwei 已提交
90 91
}

92 93
template <>
void ConvCompute<PRECISION(kInt8), PRECISION(kFloat)>::PrepareForRun() {
Y
Yan Chunwei 已提交
94 95 96 97 98
  auto& param = this->Param<param_t>();
  auto w_dims = param.filter->dims();

  auto& ctx = this->ctx_->template As<ARMContext>();

H
HappyAngel 已提交
99 100 101 102
  auto paddings = *param.paddings;
  auto dilations = *param.dilations;
  bool pads_equal =
      ((paddings[0] == paddings[1]) && (paddings[2] == paddings[3]));
103 104
  int ic = param.groups * w_dims[1];
  int oc = w_dims[0];
Y
Yan Chunwei 已提交
105 106
  int kh = w_dims[2];  // oihw
  int kw = w_dims[3];
H
HappyAngel 已提交
107 108
  int ph = paddings[0];
  int pw = paddings[2];
Y
Yan Chunwei 已提交
109 110
  int sh = param.strides[1];
  int sw = param.strides[0];
111 112
  int hin = param.x->dims()[2];
  int win = param.x->dims()[3];
H
HappyAngel 已提交
113
  bool pads_all_equal = (pads_equal && paddings[0] == paddings[2]);
Y
Yan Chunwei 已提交
114 115

  bool kps_equal = (pw == ph) && (sh == sw) && (kw == kh);
H
HappyAngel 已提交
116 117
  bool no_dilation = (dilations[0] == 1) && (dilations[1] == 1);
  bool flag_dw_3x3 = (kw == 3 && kh == 3 && (sw == 1 || sw == 2));
118
  bool flag_dw_5x5 = pads_all_equal && (kw == 5 && (sw == 1 || sw == 2));
Y
Yan Chunwei 已提交
119
  bool flag_dw = flag_dw_3x3 || flag_dw_5x5;
H
HappyAngel 已提交
120 121
  if (param.groups == ic && ic == oc && kps_equal && pads_equal &&
      no_dilation && flag_dw) {
122
    impl_ = new DepthwiseConv<PRECISION(kInt8), PRECISION(kFloat)>;
123
    // VLOG(3) << "Run DepthwiseConv Int8";
124 125
  } else if (param.groups == 1 && kw == 3 && sw == 2 && no_dilation &&
             pads_equal) {
126
    impl_ = new DirectConv<PRECISION(kInt8), PRECISION(kFloat)>;
127
    // VLOG(3) << "Run DirectConv Int8";
128 129 130 131
  } else if (param.groups == 1 && kw == 3 && sw == 1 && no_dilation &&
             pads_equal) {
    impl_ = new WinogradConv<PRECISION(kInt8), PRECISION(kFloat)>;
    // VLOG(3) << "Run WinogradConv Int8";
Y
Yan Chunwei 已提交
132
  } else {
133
    impl_ = new GemmLikeConv<PRECISION(kInt8), PRECISION(kFloat)>;
134
    // VLOG(3) << "Run GemmLikeConvInt8";
Y
Yan Chunwei 已提交
135
  }
136 137 138 139
  impl_->SetContext(std::move(this->ctx_));
  impl_->SetParam(param);
  impl_->PrepareForRun();
  is_first_epoch_ = false;
Y
Yan Chunwei 已提交
140 141
}

142 143
template <>
void ConvCompute<PRECISION(kInt8), PRECISION(kInt8)>::PrepareForRun() {
Y
Yan Chunwei 已提交
144
  auto& param = this->Param<param_t>();
145 146 147
  auto w_dims = param.filter->dims();

  auto& ctx = this->ctx_->template As<ARMContext>();
H
HappyAngel 已提交
148 149 150 151
  auto paddings = *param.paddings;
  auto dilations = *param.dilations;
  bool pads_equal =
      ((paddings[0] == paddings[1]) && (paddings[2] == paddings[3]));
152 153 154 155 156

  int ic = w_dims[1] * param.groups;
  int oc = w_dims[0];
  int kh = w_dims[2];  // oihw
  int kw = w_dims[3];
H
HappyAngel 已提交
157 158
  int ph = paddings[0];
  int pw = paddings[2];
159 160
  int sh = param.strides[1];
  int sw = param.strides[0];
161 162
  int hin = param.x->dims()[2];
  int win = param.x->dims()[3];
H
HappyAngel 已提交
163
  bool pads_all_equal = (pads_equal && paddings[0] == paddings[2]);
Y
Yan Chunwei 已提交
164

165
  bool kps_equal = (pw == ph) && (sh == sw) && (kw == kh);
H
HappyAngel 已提交
166 167
  bool no_dilation = (dilations[0] == 1) && (dilations[1] == 1);
  bool flag_dw_3x3 = (kw == 3 && kh == 3 && (sw == 1 || sw == 2));
168
  bool flag_dw_5x5 = pads_all_equal && (kw == 5 && (sw == 1 || sw == 2));
169 170
  bool flag_dw = flag_dw_3x3 || flag_dw_5x5;

H
HappyAngel 已提交
171 172
  if (param.groups == ic && ic == oc && kps_equal && pads_equal &&
      no_dilation && flag_dw) {
173
    impl_ = new DepthwiseConv<PRECISION(kInt8), PRECISION(kInt8)>;
174
    // VLOG(3) << "Run DepthwiseConv Int8";
175 176
  } else if (param.groups == 1 && kw == 3 && sw == 2 && no_dilation &&
             pads_equal) {
177
    impl_ = new DirectConv<PRECISION(kInt8), PRECISION(kInt8)>;
178
    // VLOG(3) << "Run DirectConv Int8";
179 180 181 182
  } else if (param.groups == 1 && kw == 3 && sw == 1 && no_dilation &&
             pads_equal) {
    impl_ = new WinogradConv<PRECISION(kInt8), PRECISION(kInt8)>;
    // VLOG(3) << "Run WinogradConv Int8";
183 184
  } else {
    impl_ = new GemmLikeConv<PRECISION(kInt8), PRECISION(kInt8)>;
185
    // VLOG(3) << "Run GemmLikeConvInt8";
186 187 188 189 190 191
  }
  impl_->SetContext(std::move(this->ctx_));
  impl_->SetParam(param);
  impl_->PrepareForRun();
  is_first_epoch_ = false;
}
Y
Yan Chunwei 已提交
192 193 194 195 196 197

}  // namespace arm
}  // namespace kernels
}  // namespace lite
}  // namespace paddle

198 199 200 201 202 203 204 205 206 207 208
typedef paddle::lite::kernels::arm::ConvCompute<PRECISION(kFloat),
                                                PRECISION(kFloat)>
    ConvFp32;
typedef paddle::lite::kernels::arm::ConvCompute<PRECISION(kInt8),
                                                PRECISION(kFloat)>
    ConvInt8_Fp32;
typedef paddle::lite::kernels::arm::ConvCompute<PRECISION(kInt8),
                                                PRECISION(kInt8)>
    ConvInt8_Int8;

REGISTER_LITE_KERNEL(conv2d, kARM, kFloat, kNCHW, ConvFp32, def)
Y
Yan Chunwei 已提交
209 210 211 212 213 214
    .BindInput("Input", {LiteType::GetTensorTy(TARGET(kARM))})
    .BindInput("Bias", {LiteType::GetTensorTy(TARGET(kARM))})
    .BindInput("Filter", {LiteType::GetTensorTy(TARGET(kARM))})
    .BindOutput("Output", {LiteType::GetTensorTy(TARGET(kARM))})
    .Finalize();

215
REGISTER_LITE_KERNEL(depthwise_conv2d, kARM, kFloat, kNCHW, ConvFp32, def)
Y
Yan Chunwei 已提交
216 217 218 219 220 221
    .BindInput("Input", {LiteType::GetTensorTy(TARGET(kARM))})
    .BindInput("Bias", {LiteType::GetTensorTy(TARGET(kARM))})
    .BindInput("Filter", {LiteType::GetTensorTy(TARGET(kARM))})
    .BindOutput("Output", {LiteType::GetTensorTy(TARGET(kARM))})
    .Finalize();

222
REGISTER_LITE_KERNEL(conv2d, kARM, kInt8, kNCHW, ConvInt8_Int8, int8_out)
Y
Yan Chunwei 已提交
223
    .BindInput("Input", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt8))})
224
    .BindInput("Bias", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kFloat))})
Y
Yan Chunwei 已提交
225 226 227 228 229 230
    .BindInput("Filter",
               {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt8))})
    .BindOutput("Output",
                {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt8))})
    .Finalize();

231
REGISTER_LITE_KERNEL(conv2d, kARM, kInt8, kNCHW, ConvInt8_Fp32, fp32_out)
Y
Yan Chunwei 已提交
232
    .BindInput("Input", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt8))})
233
    .BindInput("Bias", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kFloat))})
Y
Yan Chunwei 已提交
234 235 236 237 238 239 240
    .BindInput("Filter",
               {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt8))})
    .BindOutput("Output",
                {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kFloat))})
    .Finalize();

REGISTER_LITE_KERNEL(
241
    depthwise_conv2d, kARM, kInt8, kNCHW, ConvInt8_Int8, int8_out)
Y
Yan Chunwei 已提交
242
    .BindInput("Input", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt8))})
243
    .BindInput("Bias", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kFloat))})
Y
Yan Chunwei 已提交
244 245 246 247 248 249 250
    .BindInput("Filter",
               {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt8))})
    .BindOutput("Output",
                {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt8))})
    .Finalize();

REGISTER_LITE_KERNEL(
251
    depthwise_conv2d, kARM, kInt8, kNCHW, ConvInt8_Fp32, fp32_out)
Y
Yan Chunwei 已提交
252
    .BindInput("Input", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt8))})
253
    .BindInput("Bias", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kFloat))})
Y
Yan Chunwei 已提交
254 255 256 257 258
    .BindInput("Filter",
               {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt8))})
    .BindOutput("Output",
                {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kFloat))})
    .Finalize();