conv_op.cc 33.8 KB
Newer Older
1
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
C
chengduoZH 已提交
2

L
Luo Tao 已提交
3 4 5
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
C
chengduoZH 已提交
6

L
Luo Tao 已提交
7
    http://www.apache.org/licenses/LICENSE-2.0
C
chengduoZH 已提交
8

L
Luo Tao 已提交
9 10 11 12 13
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
C
chengduoZH 已提交
14

Y
Yi Wang 已提交
15
#include "paddle/fluid/operators/conv_op.h"
Y
Update  
Yi Wang 已提交
16

17
#include <memory>
Y
Update  
Yi Wang 已提交
18 19 20
#include <string>
#include <vector>

21
#ifdef PADDLE_WITH_CUDA
22
#include "paddle/fluid/operators/conv_cudnn_op_cache.h"
23 24 25 26 27
#include "paddle/fluid/platform/cudnn_helper.h"
#endif
#ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/platform/mkldnn_helper.h"
#endif
28
#include "paddle/fluid/platform/cudnn_workspace_helper.h"
C
chengduoZH 已提交
29 30 31 32

namespace paddle {
namespace operators {

C
chengduoZH 已提交
33
void ConvOp::InferShape(framework::InferShapeContext* ctx) const {
L
liym27 已提交
34 35 36 37 38 39
  PADDLE_ENFORCE_EQ(ctx->HasInput("Input"), true,
                    "Input(Input) of ConvOp should not be null.");
  PADDLE_ENFORCE_EQ(ctx->HasInput("Filter"), true,
                    "Input(Filter) of ConvOp should not be null.");
  PADDLE_ENFORCE_EQ(ctx->HasOutput("Output"), true,
                    "Output(Output) of ConvOp should not be null.");
C
chengduoZH 已提交
40 41 42

  auto in_dims = ctx->GetInputDim("Input");
  auto filter_dims = ctx->GetInputDim("Filter");
43

C
chengduoZH 已提交
44 45
  std::vector<int> strides = ctx->Attrs().Get<std::vector<int>>("strides");
  std::vector<int> paddings = ctx->Attrs().Get<std::vector<int>>("paddings");
L
liym27 已提交
46 47
  std::string padding_algorithm =
      ctx->Attrs().Get<std::string>("padding_algorithm");
C
chengduoZH 已提交
48
  int groups = ctx->Attrs().Get<int>("groups");
C
chengduoZH 已提交
49
  std::vector<int> dilations = ctx->Attrs().Get<std::vector<int>>("dilations");
L
liym27 已提交
50 51
  const std::string data_format = ctx->Attrs().Get<std::string>("data_format");
  const bool channel_last = (data_format == "NHWC" || data_format == "NDHWC");
C
chengduoZH 已提交
52

53 54 55 56 57
  PADDLE_ENFORCE_EQ(
      in_dims.size() == 4 || in_dims.size() == 5, true,
      "ShapeError: the input of Op(conv) should be 4-D or 5-D Tensor. But "
      "received: %u-D Tensor, the shape of input is [%s].",
      in_dims.size(), in_dims);
58

C
chengduoZH 已提交
59 60
  PADDLE_ENFORCE_EQ(
      in_dims.size(), filter_dims.size(),
61 62 63 64
      "ShapeError: the input's dimension size and filter's dimension size of "
      "Op(conv) should be equal. But received: the shape of input is [%s], "
      "the dimension size of input is [%d], the shape of filter is [%s],  "
      "the dimension size of filter is [%d].",
65 66 67 68
      in_dims, in_dims.size(), filter_dims, filter_dims.size());

  int in_sub_stride_size = in_dims.size() - strides.size();
  PADDLE_ENFORCE_EQ(in_dims.size() - strides.size() == 2U, true,
69 70 71 72 73 74
                    "ShapeError: the dimension size of input minus the size of "
                    "Attr(stride) must be euqal to 2 for Op(conv)."
                    "But received: the dimension size of input minus the size "
                    "of Attr(stride) is [%d], the "
                    "input's dimension size is [%d], the shape of input "
                    "is [%s], the Attr(stride)'s size is [%d].",
75 76
                    in_sub_stride_size, in_dims.size(), in_dims,
                    strides.size());
L
liym27 已提交
77 78 79

  const auto input_channels =
      channel_last ? in_dims[in_dims.size() - 1] : in_dims[1];
F
fengjiayi 已提交
80

81 82 83
  PADDLE_ENFORCE_EQ(
      input_channels, filter_dims[1] * groups,
      "ShapeError: The number of input channels should be equal to filter "
84 85 86 87 88 89 90
      "channels * groups for Op(conv). But received: the input's channels is "
      "[%d], the shape "
      "of input is [%s], the filter's channel is [%d], the shape of filter is "
      "[%s], the groups is [%d], the data_format is %s. The error may come "
      "from wrong data_format setting.",
      input_channels, in_dims, filter_dims[1], filter_dims, groups,
      data_format);
C
chengduoZH 已提交
91
  PADDLE_ENFORCE_EQ(
Y
Yang Yu 已提交
92
      filter_dims[0] % groups, 0,
93 94 95 96
      "ShapeError: The number of output channels of Op(conv) should be divided "
      "by groups. "
      "But received: the output channels is [%d], the shape of filter is [%s] "
      "(the first dimension of filter is output channel), the groups is [%d].",
97
      filter_dims[0], filter_dims, groups);
C
chengduoZH 已提交
98

L
liym27 已提交
99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115
  framework::DDim in_data_dims;
  if (channel_last) {
    in_data_dims = framework::slice_ddim(in_dims, 1, in_dims.size() - 1);
  } else {
    in_data_dims = framework::slice_ddim(in_dims, 2, in_dims.size());
  }
  framework::DDim filter_data_dims =
      framework::slice_ddim(filter_dims, 2, filter_dims.size());
  std::vector<int> ksize = framework::vectorize<int>(filter_data_dims);
  UpdatePaddingAndDilation(&paddings, &dilations, padding_algorithm,
                           in_data_dims, strides, ksize);

  std::vector<int64_t> output_shape({in_dims[0]});
  if (!channel_last) {
    output_shape.push_back(filter_dims[0]);
  }
  for (size_t i = 0; i < in_data_dims.size(); ++i) {
T
tink2123 已提交
116
    if ((!ctx->IsRuntime()) &&
L
liym27 已提交
117
        (in_data_dims[i] <= 0 || filter_dims[i + 2] <= 0)) {
T
tink2123 已提交
118 119
      output_shape.push_back(-1);
    } else {
L
liym27 已提交
120 121 122
      output_shape.push_back(ConvOutputSize(in_data_dims[i], filter_dims[i + 2],
                                            dilations[i], paddings[2 * i],
                                            paddings[2 * i + 1], strides[i]));
T
tink2123 已提交
123
    }
C
chengduoZH 已提交
124
  }
L
liym27 已提交
125 126 127 128
  if (channel_last) {
    output_shape.push_back(filter_dims[0]);
  }

129
  ctx->SetOutputDim("Output", framework::make_ddim(output_shape));
Y
Yang Yu 已提交
130
  ctx->ShareLoD("Input", "Output");
C
chengduoZH 已提交
131 132
}

133 134
framework::OpKernelType ConvOp::GetExpectedKernelType(
    const framework::ExecutionContext& ctx) const {
X
Xin Pan 已提交
135 136
  int customized_type_value =
      framework::OpKernelType::kDefaultCustomizedTypeValue;
137
  framework::LibraryType library{framework::LibraryType::kPlain};
M
mozga-intel 已提交
138
  // TODO(pzelazko-intel): enable MKLDNN layout when it's ready
139
  auto input_data_type = OperatorWithKernel::IndicateVarDataType(ctx, "Input");
L
liym27 已提交
140 141
  std::string data_format =
      "AnyLayout";  // todo enable data layout when it's ready
M
mozga-intel 已提交
142 143
  framework::DataLayout layout = framework::StringToDataLayout(data_format);

C
chengduoZH 已提交
144
#ifdef PADDLE_WITH_CUDA
145
  if (platform::CanCUDNNBeUsed(ctx)) {
146
    library = framework::LibraryType::kCUDNN;
C
chengduoZH 已提交
147 148
  }
#endif
149
#ifdef PADDLE_WITH_MKLDNN
150
  if (library == framework::LibraryType::kPlain &&
151
      platform::CanMKLDNNBeUsed(ctx)) {
152
    library = framework::LibraryType::kMKLDNN;
M
mozga-intel 已提交
153
    layout = framework::DataLayout::kMKLDNN;
154
    customized_type_value =
155 156
        (input_data_type == framework::DataTypeTrait<int8_t>::DataType() ||
         input_data_type == framework::DataTypeTrait<uint8_t>::DataType())
157 158
            ? kConvMKLDNNINT8
            : kConvMKLDNNFP32;
159
  }
160
#endif
161

162 163 164 165 166 167
  if (input_data_type != framework::proto::VarType::INT8 &&
      input_data_type != framework::proto::VarType::UINT8) {
    auto filter_data_type = ctx.Input<Tensor>("Filter")->type();
    PADDLE_ENFORCE_EQ(input_data_type, filter_data_type,
                      "input and filter data type should be consistent");
  }
K
Kexin Zhao 已提交
168
  if (input_data_type == framework::proto::VarType::FP16) {
169
    PADDLE_ENFORCE_EQ(library, framework::LibraryType::kCUDNN,
K
Kexin Zhao 已提交
170 171 172
                      "float16 can only be used when CUDNN is used");
  }

173 174 175 176 177 178 179 180 181 182 183 184 185 186
  auto type = framework::OpKernelType(input_data_type, ctx.GetPlace(), layout,
                                      library, customized_type_value);
#ifdef PADDLE_WITH_CUDA
  std::vector<framework::KernelConfig>& configs = kernel_configs_map_[type];
  // TODO(dangqingqing): Currently conv_fusion_op use cudnn but sets use_cudnn
  // to false. It should be fixed and then here should only create if library
  // is kCUDNN.
  if (configs.empty()) {
    std::shared_ptr<framework::AlgorithmsCache<cudnnConvolutionFwdAlgo_t>> p(
        new framework::AlgorithmsCache<cudnnConvolutionFwdAlgo_t>());
    configs.push_back(p);
  }
#endif
  return type;
187 188
}

Y
Yu Yang 已提交
189
void Conv2DOpMaker::Make() {
190 191 192 193
  AddAttr<bool>("is_test",
                "(bool, default false) Set to true for inference only, false "
                "for training. Some layers may run faster when this is true.")
      .SetDefault(false);
L
liym27 已提交
194 195 196 197 198 199
  AddInput("Input",
           "(Tensor) The input tensor of convolution operator. "
           "The format of input tensor is NCHW or NHWC, where N is batch size, "
           "C is the "
           "number of channels, H is the height of the feature, "
           "and W is the width of the feature.");
C
chengduoZH 已提交
200
  AddInput("Filter",
C
fix doc  
chengduoZH 已提交
201
           "(Tensor) The filter tensor of convolution operator. "
C
chengduoZH 已提交
202 203
           "The format of the filter tensor is MCHW, where M is the number of "
           "output image channels, C is the number of input image channels, "
C
fix doc  
chengduoZH 已提交
204 205
           "H is the height of the filter, and W is the width of the filter. "
           "If the groups attribute is greater than 1, C equals the number of "
C
chengduoZH 已提交
206
           "input image channels divided by the groups.");
207 208 209 210 211
  AddInput("Bias",
           "(Tensor) Bias to be added to each output of filter application."
           "The format of output tensor is X (one-dimensional) of size equal"
           "to the number of output channels. Only used with MKL-DNN.")
      .AsDispensable();
212 213 214
  AddInput("ResidualData",
           "(Tensor) Tensor with residual data "
           "to which convolution output will be added."
215
           "Used with fuse_residual_connection fusion.")
216
      .AsDispensable();
Y
Yihua Xu 已提交
217 218
  AddOutput("Output",
            "(Tensor) The output tensor of convolution operator. "
L
liym27 已提交
219
            "It has same data fromat and data type as the Input.");
C
chengduoZH 已提交
220 221 222 223
  AddAttr<std::vector<int>>("strides",
                            "(vector<int> default:{1, 1}), the "
                            "strides(h_stride, w_stride) of "
                            "convolution operator.")
C
chengduoZH 已提交
224
      .SetDefault({1, 1});
C
chengduoZH 已提交
225 226
  AddAttr<std::vector<int>>("paddings",
                            "(vector<int> default:{0, 0}), the "
L
liym27 已提交
227 228
                            "paddings(pad_height_top, pad_height_bottom, "
                            "pad_width_left, pad_wifth_right)  of "
C
chengduoZH 已提交
229
                            "convolution operator.")
C
chengduoZH 已提交
230
      .SetDefault({0, 0});
L
liym27 已提交
231 232 233 234 235 236
  AddAttr<std::string>(
      "padding_algorithm",
      "(string, default \"EXPLICIT\") An optional string from: \"EXPLICIT\","
      "\"SAME\",\"VALID\". Set to \"EXPLICIT\" for explicit padding. "
      "Set to \"SAME\" or \"VALID\" for algorithm of padding. ")
      .SetDefault("EXPLICIT");
C
chengduoZH 已提交
237 238
  AddAttr<int>(
      "groups",
C
chengduoZH 已提交
239
      "(int default:1), the groups number of the convolution operator. "
C
fix doc  
chengduoZH 已提交
240 241 242 243
      "According to grouped convolution in Alex Krizhevsky's Deep CNN paper: "
      "when group=2, the first half of the filters is only connected to the "
      "first half of the input channels, while the second half of the filters "
      "is only connected to the second half of the input channels.")
C
chengduoZH 已提交
244
      .SetDefault(1);
C
chengduoZH 已提交
245
  AddAttr<std::vector<int>>("dilations",
C
chengduoZH 已提交
246 247
                            "(vector<int> default:{1, 1}), the "
                            "dilations(h_dilation, w_dilation) of "
C
chengduoZH 已提交
248
                            "convolution operator.")
C
chengduoZH 已提交
249
      .SetDefault({1, 1});
250 251 252 253
  AddAttr<bool>(
      "use_cudnn",
      "(bool, default false) Only used in cudnn kernel, need install cudnn")
      .SetDefault(false);
254 255 256
  AddAttr<bool>("fuse_relu_before_depthwise_conv",
                "(bool, default false) Only used in cuda depthwise kernel")
      .SetDefault(false);
257 258 259
  AddAttr<bool>("use_mkldnn",
                "(bool, default false) Only used in mkldnn kernel")
      .SetDefault(false);
260 261 262 263 264 265
  AddAttr<bool>("use_quantizer",
                "(bool, default false) "
                "Set to true for operators that should be quantized and use "
                "int8 kernel. "
                "Only used on CPU.")
      .SetDefault(false);
M
Michal Gallus 已提交
266 267
  AddAttr<bool>("fuse_relu", "(bool, default false) Only used in mkldnn kernel")
      .SetDefault(false);
268 269 270 271 272 273
  AddAttr<bool>("fuse_brelu",
                "(bool, default false) Only used in mkldnn kernel")
      .SetDefault(false);
  AddAttr<float>("fuse_brelu_threshold",
                 "(float, default false 6.0) Only used in mkldnn kernel")
      .SetDefault(6.0f);
274 275 276 277 278 279 280 281
  AddAttr<std::string>("fuse_activation",
                       "(string, default \"\") Only used in mkldnn kernel")
      .SetDefault("");
  AddAttr<float>("fuse_alpha",
                 "(float, default 0.0) Only used in mkldnn kernel")
      .SetDefault(0.0f);
  AddAttr<float>("fuse_beta", "(float, default 0.0) Only used in mkldnn kernel")
      .SetDefault(0.0f);
282
  AddAttr<bool>("fuse_residual_connection",
283
                "(bool, default false) Only used in mkldnn kernel. Used "
284 285
                "whenever convolution output is as an input to residual "
                "connection.")
286
      .SetDefault(false);
287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306
  AddAttr<float>("Scale_in",
                 "Scale_in to be used for int8 input data."
                 "Only used with MKL-DNN INT8.")
      .SetDefault(1.0f);
  AddAttr<float>("Scale_out",
                 "Scale_out to be used for int8 output data."
                 "Only used with MKL-DNN INT8.")
      .SetDefault(1.0f);
  AddAttr<float>("Scale_in_eltwise",
                 "Scale_in_eltwise to be used for int8 eltwise input data."
                 "Only used with MKL-DNN INT8.")
      .SetDefault(1.0f);
  AddAttr<std::vector<float>>("Scale_weights",
                              "Scale_weights to be used for int8 weights data."
                              "Only used with MKL-DNN INT8.")
      .SetDefault({1.0f});
  AddAttr<bool>("force_fp32_output",
                "(bool, default false) Force INT8 kernel output FP32, only "
                "used in MKL-DNN INT8")
      .SetDefault(false);
307 308 309 310 311 312
  AddAttr<std::string>(
      "data_format",
      "(string, default NCHW) Only used in "
      "An optional string from: \"NHWC\", \"NCHW\". "
      "Defaults to \"NHWC\". Specify the data format of the output data, "
      "the input will be transformed automatically. ")
L
liym27 已提交
313
      .SetDefault("NCHW");
314 315 316 317 318 319 320 321
  // TODO(dzhwinter): need to registered layout transform function
  AddAttr<int>("workspace_size_MB",
               "Only used in cudnn kernel. Need set use_cudnn to true."
               "workspace size for cudnn, in MB, "
               "workspace is a section of GPU memory which will be "
               "allocated/freed each time the operator runs, larger "
               "workspace size can increase performance but also requires "
               "better hardware. This size should be chosen carefully.")
322
      .SetDefault(platform::GetDefaultConvWorkspaceSizeLimitMB());
323 324
  AddAttr<bool>("exhaustive_search",
                "(bool, default false) cuDNN has many algorithm to calculation "
C
chengduo 已提交
325
                "convolution, whether enable exhaustive search "
翟飞跃 已提交
326
                "for cuDNN convolution or not, default is False.")
327
      .SetDefault(false);
L
liym27 已提交
328

C
chengduoZH 已提交
329
  AddComment(R"DOC(
C
fix doc  
chengduoZH 已提交
330 331
Convolution Operator.

C
chengduoZH 已提交
332
The convolution operation calculates the output based on the input, filter
C
chengduoZH 已提交
333
and strides, paddings, dilations, groups parameters. The size of each dimension of the
C
chengduoZH 已提交
334
parameters is checked in the infer-shape.
L
liym27 已提交
335
Input(Input) and Output(Output) are in NCHW or NHWC format. Where N is batch
C
fix doc  
chengduoZH 已提交
336
size, C is the number of channels, H is the height of the feature, and W is
C
chengduoZH 已提交
337 338 339 340 341 342
the width of the feature.
Filters(Input) is MCHW format. Where M is the number of output image channels, C is
the number of input image channels, H is the height of the filter, and W
is the width of the filter.
Parameters(strides, paddings, dilations) are two elements. These two elements represent
height and width, respectively.
C
chengduoZH 已提交
343 344 345 346
The input(X) size and output(Out) size may be different.

Example:
  Input:
C
chengduoZH 已提交
347 348
       Input shape: $(N, C_{in}, H_{in}, W_{in})$
       Filter shape: $(C_{out}, C_{in}, H_f, W_f)$
C
chengduoZH 已提交
349
  Output:
C
chengduoZH 已提交
350 351 352
       Output shape: $(N, C_{out}, H_{out}, W_{out})$
  Where
$$
L
liym27 已提交
353 354
       H_{out}= \frac{(H_{in} + pad_height_top + pad_height_bottom - (dilations[0] * (H_f - 1) + 1))}{strides[0]}+ 1 \\
       W_{out}= \frac{(W_{in} + pad_width_left + pad_width_right - (dilations[1] * (W_f - 1) + 1))}{strides[1]}+ 1
C
chengduoZH 已提交
355
$$
C
chengduoZH 已提交
356
)DOC");
Q
qingqing01 已提交
357
  Apply();
C
chengduoZH 已提交
358 359
}

Y
Yu Yang 已提交
360
void Conv3DOpMaker::Make() {
361 362 363 364
  AddAttr<bool>("is_test",
                "(bool, default false) Set to true for inference only, false "
                "for training. Some layers may run faster when this is true.")
      .SetDefault(false);
C
chengduoZH 已提交
365 366
  AddInput(
      "Input",
C
fix doc  
chengduoZH 已提交
367
      "(Tensor) The input tensor of convolution operator. "
L
liym27 已提交
368 369
      "The format of input tensor is NCDHW or NDHWC. Where N is batch size, C "
      "is the "
C
fix doc  
chengduoZH 已提交
370 371 372
      "number of channels, D is the depth of the feature, H is the height of "
      "the feature, "
      "and W is the width of the feature.");
C
chengduoZH 已提交
373
  AddInput("Filter",
C
fix doc  
chengduoZH 已提交
374
           "(Tensor) The filter tensor of convolution operator. "
C
chengduoZH 已提交
375 376
           "The format of the filter tensor is MCDHW, where M is the number of "
           "output image channels, C is the number of input image channels, "
C
fix doc  
chengduoZH 已提交
377 378 379
           "D is the depth of the filter, H is the height of the filter, and W "
           "is the width of the filter."
           "If the groups attribute is greater than 1, C equals the number of "
C
chengduoZH 已提交
380
           "input image channels divided by the groups.");
381 382 383 384 385
  AddInput("ResidualData",
           "(Tensor) Tensor with residual data "
           "to which convolution output will be added."
           "Used with fuse_residual_connection fusion.")
      .AsDispensable();
Y
Yihua Xu 已提交
386 387
  AddOutput("Output",
            "(Tensor) The output tensor of convolution operator."
L
liym27 已提交
388
            "It has same data fromat and data type as the Input.");
C
chengduoZH 已提交
389 390 391 392
  AddAttr<std::vector<int>>("strides",
                            "(vector<int>, default:{1, 1, 1}), the "
                            "strides(d_stride, h_stride, w_stride) of "
                            "convolution operator.")
C
chengduoZH 已提交
393
      .SetDefault({1, 1, 1});
L
liym27 已提交
394 395 396 397 398 399
  AddAttr<std::vector<int>>(
      "paddings",
      "(vector<int>, default:{0, 0, 0}), the "
      "paddings(pad_depth_front, pad_depth_back, pad_height_top, "
      "pad_height_bottom, pad_width_left, pad_width_right) of convolution "
      "operator.")
C
chengduoZH 已提交
400
      .SetDefault({0, 0, 0});
L
liym27 已提交
401 402 403 404 405 406
  AddAttr<std::string>(
      "padding_algorithm",
      "(string, default \"EXPLICIT\") An optional string from: \"EXPLICIT\","
      "\"SAME\",\"VALID\". Set to \"EXPLICIT\" for explicit padding. "
      "Set to \"SAME\" or \"VALID\" for algorithm of padding. ")
      .SetDefault("EXPLICIT");
C
chengduoZH 已提交
407 408
  AddAttr<int>(
      "groups",
C
chengduoZH 已提交
409
      "(int default:1), the groups number of the convolution operator. "
C
fix doc  
chengduoZH 已提交
410 411 412 413
      "According to grouped convolution in Alex Krizhevsky's Deep CNN paper: "
      "when group=2, the first half of the filters is only connected to the "
      "first half of the input channels, while the second half of the filters "
      "is only connected to the second half of the input channels.")
C
chengduoZH 已提交
414
      .SetDefault(1);
C
chengduoZH 已提交
415
  AddAttr<std::vector<int>>("dilations",
C
chengduoZH 已提交
416 417
                            "(vector<int> default:{1, 1, 1}), the "
                            "dilations(d_dilation, h_dilation, w_dilation) of "
C
chengduoZH 已提交
418
                            "convolution operator.")
C
chengduoZH 已提交
419
      .SetDefault({1, 1, 1});
420 421 422 423
  AddAttr<bool>(
      "use_cudnn",
      "(bool, default false) Only used in cudnn kernel, need install cudnn")
      .SetDefault(false);
424 425 426
  AddAttr<bool>("use_mkldnn",
                "(bool, default false) Only used in mkldnn kernel")
      .SetDefault(false);
427 428
  AddAttr<bool>("fuse_relu", "(bool, default false) Only used in mkldnn kernel")
      .SetDefault(false);
429 430 431 432 433 434 435 436
  AddAttr<std::string>("fuse_activation",
                       "(string, default \"\") Only used in mkldnn kernel")
      .SetDefault("");
  AddAttr<float>("fuse_alpha",
                 "(float, default 0.0) Only used in mkldnn kernel")
      .SetDefault(0.0f);
  AddAttr<float>("fuse_beta", "(float, default 0.0) Only used in mkldnn kernel")
      .SetDefault(0.0f);
437 438 439 440 441
  AddAttr<bool>("fuse_residual_connection",
                "(bool, default false) Only used in mkldnn kernel. Used "
                "whenever convolution output is as an input to residual "
                "connection.")
      .SetDefault(false);
442 443
  AddAttr<std::string>(
      "data_format",
L
liym27 已提交
444 445 446
      "(string, default NCDHW) Only used in "
      "An optional string from: \"NDHWC\", \"NCDHW\". "
      "Defaults to \"NDHWC\". Specify the data format of the output data, "
447
      "the input will be transformed automatically. ")
L
liym27 已提交
448
      .SetDefault("NCDHW");
449 450 451
  AddAttr<bool>("force_fp32_output",
                "(bool, default false) Only used in mkldnn INT8 kernel")
      .SetDefault(false);
452 453 454 455 456 457 458
  // TODO(dzhwinter): need to registered layout transform function
  AddAttr<int>("workspace_size_MB",
               "Only used in cudnn kernel. workspace size for cudnn, in MB, "
               "workspace is a section of GPU memory which will be "
               "allocated/freed each time the operator runs, larger "
               "workspace size can increase performance but also requires "
               "better hardware. This size should be chosen carefully.")
459
      .SetDefault(platform::GetDefaultConvWorkspaceSizeLimitMB());
460 461
  AddAttr<bool>("exhaustive_search",
                "(bool, default false) cuDNN has many algorithm to calculation "
C
chengduo 已提交
462
                "convolution, whether enable exhaustive search "
翟飞跃 已提交
463
                "for cuDNN convolution or not, default is False.")
464
      .SetDefault(false);
C
chengduoZH 已提交
465
  AddComment(R"DOC(
C
fix doc  
chengduoZH 已提交
466 467
Convolution3D Operator.

C
chengduoZH 已提交
468
The convolution operation calculates the output based on the input, filter
C
chengduoZH 已提交
469
and strides, paddings, dilations, groups parameters. The size of each dimension of the
C
chengduoZH 已提交
470
parameters is checked in the infer-shape.
L
liym27 已提交
471
Input(Input) and output(Output) are in NCDHW or NDHWC format, where N is batch
C
fix doc  
chengduoZH 已提交
472
size, C is the number of channels,D is the depth of the feature, H is the height of
C
chengduoZH 已提交
473 474 475 476 477 478
the feature, and W is the width of the feature.
Filters(Input) is MCDHW format, where M is the number of output image channels,
C is the number of input image channels, D is the depth of the filter,
H is the height of the filter, and W is the width of the filter.
Parameters(strides, paddings, dilations) are three elements. These three elements
represent depth, height and width, respectively.
C
fix doc  
chengduoZH 已提交
479 480 481 482
The input(X) size and output(Out) size may be different.

Example:
  Input:
C
chengduoZH 已提交
483 484
       Input shape: $(N, C_{in}, D_{in}, H_{in}, W_{in})$
       Filter shape: $(C_{out}, C_{in}, D_f, H_f, W_f)$
C
fix doc  
chengduoZH 已提交
485
  Output:
C
chengduoZH 已提交
486 487 488
       Output shape: $(N, C_{out}, D_{out}, H_{out}, W_{out})$
  Where
  $$
L
liym27 已提交
489 490 491
       D_{out}= \frac{(D_{in} + pad_depth_front + pad_depth_back - (dilations[0] * (D_f - 1) + 1))}{ strides[0]}+ 1 \\
       H_{out}= \frac{(H_{in} + pad_height_top + pad_height_bottom - (dilations[1] * (H_f - 1) + 1))}{ strides[1]}+ 1 \\
       W_{out}= \frac{(W_{in} + pad_width_left + pad_width_right - (dilations[2] * (W_f - 1) + 1))}{ strides[2]}+ 1
C
chengduoZH 已提交
492
  $$
C
chengduoZH 已提交
493
)DOC");
Q
qingqing01 已提交
494
  Apply();
C
chengduoZH 已提交
495 496
}

C
chengduoZH 已提交
497 498 499 500 501 502 503 504 505 506 507
void ConvOpGrad::InferShape(framework::InferShapeContext* ctx) const {
  auto in_dims = ctx->GetInputDim("Input");
  auto filter_dims = ctx->GetInputDim("Filter");
  if (ctx->HasOutput(framework::GradVarName("Input"))) {
    ctx->SetOutputDim(framework::GradVarName("Input"), in_dims);
  }
  if (ctx->HasOutput(framework::GradVarName("Filter"))) {
    ctx->SetOutputDim(framework::GradVarName("Filter"), filter_dims);
  }
}

508 509
framework::OpKernelType ConvOpGrad::GetExpectedKernelType(
    const framework::ExecutionContext& ctx) const {
X
Xin Pan 已提交
510 511
  int customized_type_value =
      framework::OpKernelType::kDefaultCustomizedTypeValue;
512
  framework::LibraryType library_{framework::LibraryType::kPlain};
M
mozga-intel 已提交
513
  // TODO(pzelazko-intel): enable MKLDNN layout when it's ready
L
liym27 已提交
514
  std::string data_format = "AnyLayout";
M
mozga-intel 已提交
515 516
  framework::DataLayout layout_ = framework::StringToDataLayout(data_format);

C
chengduoZH 已提交
517
#ifdef PADDLE_WITH_CUDA
518 519
  if (platform::CanCUDNNBeUsed(ctx)) {
    library_ = framework::LibraryType::kCUDNN;
C
chengduoZH 已提交
520 521
  }
#endif
522 523 524 525
#ifdef PADDLE_WITH_MKLDNN
  if (library_ == framework::LibraryType::kPlain &&
      platform::CanMKLDNNBeUsed(ctx)) {
    library_ = framework::LibraryType::kMKLDNN;
M
mozga-intel 已提交
526
    layout_ = framework::DataLayout::kMKLDNN;
X
Xin Pan 已提交
527
    customized_type_value = kConvMKLDNNFP32;
528
  }
529
#endif
530

531 532 533
  auto type = framework::OpKernelType(
      OperatorWithKernel::IndicateVarDataType(ctx, "Input"), ctx.GetPlace(),
      layout_, library_, customized_type_value);
534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549
#ifdef PADDLE_WITH_CUDA
  if (library_ == framework::LibraryType::kCUDNN) {
    std::vector<framework::KernelConfig>& configs = kernel_configs_map_[type];
    if (configs.empty()) {
      std::shared_ptr<framework::AlgorithmsCache<cudnnConvolutionBwdDataAlgo_t>>
          p(new framework::AlgorithmsCache<cudnnConvolutionBwdDataAlgo_t>());
      configs.push_back(p);

      std::shared_ptr<
          framework::AlgorithmsCache<cudnnConvolutionBwdFilterAlgo_t>>
          p2(new framework::AlgorithmsCache<cudnnConvolutionBwdFilterAlgo_t>());
      configs.push_back(p2);
    }
  }
#endif
  return type;
550 551
}

H
hong 已提交
552 553
template <typename T>
class Conv2DGradMaker : public framework::SingleGradOpMaker<T> {
554
 public:
H
hong 已提交
555
  using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
556

H
hong 已提交
557 558
  std::unique_ptr<T> Apply() const override {
    auto* op = new T();
S
sneaxiy 已提交
559
    op->SetType(this->ForwardOpType() + "_grad");
H
hong 已提交
560 561 562 563
    op->SetInput("Input", this->Input("Input"));
    op->SetInput("Filter", this->Input("Filter"));
    op->SetInput("Bias", this->Input("Bias"));
    op->SetInput(framework::GradVarName("Output"), this->OutputGrad("Output"));
564

H
hong 已提交
565 566 567 568
    op->SetOutput(framework::GradVarName("Input"), this->InputGrad("Input"));
    op->SetOutput(framework::GradVarName("Filter"), this->InputGrad("Filter"));
    op->SetOutput(framework::GradVarName("Bias"), this->InputGrad("Bias"));
    op->SetAttrMap(this->Attrs());
569

H
hong 已提交
570
    return std::unique_ptr<T>(op);
571
  }
S
sneaxiy 已提交
572 573
};

H
hong 已提交
574 575
template <typename T>
class Conv3DGradMaker : public framework::SingleGradOpMaker<T> {
S
sneaxiy 已提交
576
 public:
H
hong 已提交
577
  using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
578

H
hong 已提交
579 580
  std::unique_ptr<T> Apply() const override {
    auto* op = new T();
S
sneaxiy 已提交
581
    op->SetType(this->ForwardOpType() + "_grad");
H
hong 已提交
582 583 584
    op->SetInput("Input", this->Input("Input"));
    op->SetInput("Filter", this->Input("Filter"));
    op->SetInput(framework::GradVarName("Output"), this->OutputGrad("Output"));
S
sneaxiy 已提交
585

H
hong 已提交
586 587
    op->SetOutput(framework::GradVarName("Input"), this->InputGrad("Input"));
    op->SetOutput(framework::GradVarName("Filter"), this->InputGrad("Filter"));
S
sneaxiy 已提交
588

H
hong 已提交
589 590
    if (this->HasInput("ResidualData")) {
      op->SetInput("ResidualData", this->Input("ResidualData"));
S
sneaxiy 已提交
591 592
    }

H
hong 已提交
593
    op->SetAttrMap(this->Attrs());
S
sneaxiy 已提交
594

H
hong 已提交
595
    return std::unique_ptr<T>(op);
596 597 598
  }
};

Q
qingqing01 已提交
599 600 601 602
/*
 * Inputs:  I, W, dO, ddI, ddW
 * Outputs: ddO, dW, dI
 */
H
hong 已提交
603 604
template <typename T>
class Conv2DDoubleGradMaker : public framework::SingleGradOpMaker<T> {
Q
qingqing01 已提交
605
 public:
H
hong 已提交
606
  using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
Q
qingqing01 已提交
607

H
hong 已提交
608 609
  std::unique_ptr<T> Apply() const override {
    auto* op = new T();
Q
qingqing01 已提交
610 611
    op->SetType(this->ForwardOpType() + "_grad");
    // I, W, dO, ddI, ddW
H
hong 已提交
612 613 614 615 616 617
    op->SetInput("Input", this->Input("Input"));
    op->SetInput("Filter", this->Input("Filter"));
    op->SetInput("DOutput", this->Input(framework::GradVarName("Output")));
    op->SetInput("DDInput", this->OutputGrad(framework::GradVarName("Input")));
    op->SetInput("DDFilter",
                 this->OutputGrad(framework::GradVarName("Filter")));
Q
qingqing01 已提交
618 619 620 621

    // ddO, dI, dW
    // Unlike grad op, double grad op does not use name@GRAD@GRAD
    // as key of ops' inputs and outputs.
H
hong 已提交
622 623
    auto ddx = this->OutputGrad(framework::GradVarName("Input"));
    auto ddw = this->OutputGrad(framework::GradVarName("Filter"));
624

L
lvmengsi 已提交
625
    op->SetOutput("DDOutput",
H
hong 已提交
626 627 628 629 630 631 632
                  ddx.empty()
                      ? this->Empty()
                      : this->InputGrad(framework::GradVarName("Output")));
    op->SetOutput("DFilter",
                  ddx.empty() ? this->Empty() : this->InputGrad("Filter"));
    op->SetOutput("DInput",
                  ddw.empty() ? this->Empty() : this->InputGrad("Input"));
633

H
hong 已提交
634
    op->SetAttrMap(this->Attrs());
Q
qingqing01 已提交
635

H
hong 已提交
636
    return std::unique_ptr<T>(op);
Q
qingqing01 已提交
637 638 639
  }
};

L
lvmengsi 已提交
640 641 642 643
/*
 * Inputs:  I, W, dO, ddI, ddW
 * Outputs: ddO, dW, dI
 */
H
hong 已提交
644 645
template <typename T>
class Conv3DDoubleGradMaker : public framework::SingleGradOpMaker<T> {
L
lvmengsi 已提交
646
 public:
H
hong 已提交
647
  using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
L
lvmengsi 已提交
648

H
hong 已提交
649 650
  std::unique_ptr<T> Apply() const override {
    auto* op = new T();
L
lvmengsi 已提交
651 652
    op->SetType(this->ForwardOpType() + "_grad");
    // I, W, dO, ddI, ddW
H
hong 已提交
653 654 655 656 657 658
    op->SetInput("Input", this->Input("Input"));
    op->SetInput("Filter", this->Input("Filter"));
    op->SetInput("DOutput", this->Input(framework::GradVarName("Output")));
    op->SetInput("DDInput", this->OutputGrad(framework::GradVarName("Input")));
    op->SetInput("DDFilter",
                 this->OutputGrad(framework::GradVarName("Filter")));
L
lvmengsi 已提交
659

H
hong 已提交
660 661
    auto ddx = this->OutputGrad(framework::GradVarName("Input"));
    auto ddw = this->OutputGrad(framework::GradVarName("Filter"));
L
lvmengsi 已提交
662

L
lvmengsi 已提交
663
    op->SetOutput("DDOutput",
H
hong 已提交
664 665 666 667 668 669 670
                  ddx.empty()
                      ? this->Empty()
                      : this->InputGrad(framework::GradVarName("Output")));
    op->SetOutput("DFilter",
                  ddx.empty() ? this->Empty() : this->InputGrad("Filter"));
    op->SetOutput("DInput",
                  ddw.empty() ? this->Empty() : this->InputGrad("Input"));
L
lvmengsi 已提交
671

H
hong 已提交
672
    op->SetAttrMap(this->Attrs());
L
lvmengsi 已提交
673

H
hong 已提交
674
    return std::unique_ptr<T>(op);
L
lvmengsi 已提交
675 676 677
  }
};

Q
qingqing01 已提交
678 679 680 681 682
void ConvOpDoubleGrad::InferShape(framework::InferShapeContext* ctx) const {
  auto x_dims = ctx->GetInputDim("Input");
  auto w_dims = ctx->GetInputDim("Filter");
  auto do_dims = ctx->GetInputDim("DOutput");

L
lvmengsi 已提交
683 684
  if (ctx->HasOutput("DDOutput") &&
      (ctx->HasInput("DDInput") || (ctx->HasInput("DDFilter")))) {
Q
qingqing01 已提交
685 686
    ctx->SetOutputDim("DDOutput", do_dims);
  }
687
  if (ctx->HasOutput("DFilter") && ctx->HasInput("DDInput")) {
Q
qingqing01 已提交
688 689
    ctx->SetOutputDim("DFilter", w_dims);
  }
690
  if (ctx->HasOutput("DInput") && ctx->HasInput("DDFilter")) {
Q
qingqing01 已提交
691 692 693 694 695 696 697 698 699
    ctx->SetOutputDim("DInput", x_dims);
  }
}

framework::OpKernelType ConvOpDoubleGrad::GetExpectedKernelType(
    const framework::ExecutionContext& ctx) const {
  int customized_type_value =
      framework::OpKernelType::kDefaultCustomizedTypeValue;
  framework::LibraryType library_{framework::LibraryType::kPlain};
L
liym27 已提交
700
  std::string data_format = "AnyLayout";
Q
qingqing01 已提交
701 702 703 704 705
  framework::DataLayout layout_ = framework::StringToDataLayout(data_format);

#ifdef PADDLE_WITH_CUDA
  if (platform::CanCUDNNBeUsed(ctx)) {
    library_ = framework::LibraryType::kCUDNN;
L
lvmengsi 已提交
706 707 708 709 710 711 712 713
  }
#endif
#ifdef PADDLE_WITH_MKLDNN
  if (library_ == framework::LibraryType::kPlain &&
      platform::CanMKLDNNBeUsed(ctx)) {
    library_ = framework::LibraryType::kMKLDNN;
    layout_ = framework::DataLayout::kMKLDNN;
    customized_type_value = kConvMKLDNNFP32;
Q
qingqing01 已提交
714 715
  }
#endif
716 717 718
  auto type = framework::OpKernelType(
      OperatorWithKernel::IndicateVarDataType(ctx, "Input"), ctx.GetPlace(),
      layout_, library_, customized_type_value);
Q
qingqing01 已提交
719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740
#ifdef PADDLE_WITH_CUDA
  if (library_ == framework::LibraryType::kCUDNN) {
    std::vector<framework::KernelConfig>& configs = kernel_configs_map_[type];
    if (configs.empty()) {
      std::shared_ptr<framework::AlgorithmsCache<cudnnConvolutionFwdAlgo_t>> p0(
          new framework::AlgorithmsCache<cudnnConvolutionFwdAlgo_t>());
      configs.push_back(p0);

      std::shared_ptr<
          framework::AlgorithmsCache<cudnnConvolutionBwdFilterAlgo_t>>
          p1(new framework::AlgorithmsCache<cudnnConvolutionBwdFilterAlgo_t>());
      configs.push_back(p1);

      std::shared_ptr<framework::AlgorithmsCache<cudnnConvolutionBwdDataAlgo_t>>
          p2(new framework::AlgorithmsCache<cudnnConvolutionBwdDataAlgo_t>());
      configs.push_back(p2);
    }
  }
#endif
  return type;
}

C
chengduoZH 已提交
741 742 743 744
}  // namespace operators
}  // namespace paddle

namespace ops = paddle::operators;
Y
Yang Yang 已提交
745
REGISTER_OPERATOR(conv2d, ops::ConvOp, ops::Conv2DOpMaker,
H
hong 已提交
746 747 748 749 750 751
                  ops::ConvOpInferVarType,
                  ops::Conv2DGradMaker<paddle::framework::OpDesc>,
                  ops::Conv2DGradMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(conv2d_grad, ops::ConvOpGrad,
                  ops::Conv2DDoubleGradMaker<paddle::framework::OpDesc>,
                  ops::Conv2DDoubleGradMaker<paddle::imperative::OpBase>);
Q
qingqing01 已提交
752
REGISTER_OPERATOR(conv2d_grad_grad, ops::ConvOpDoubleGrad);
753 754

// depthwise convolution op
Y
Yang Yang 已提交
755
REGISTER_OPERATOR(depthwise_conv2d, ops::ConvOp, ops::Conv2DOpMaker,
H
hong 已提交
756 757 758
                  ops::ConvOpInferVarType,
                  ops::Conv2DGradMaker<paddle::framework::OpDesc>,
                  ops::Conv2DGradMaker<paddle::imperative::OpBase>);
759
REGISTER_OPERATOR(depthwise_conv2d_grad, ops::ConvOpGrad);
C
chengduo 已提交
760

Y
Yang Yang 已提交
761
REGISTER_OPERATOR(conv3d, ops::ConvOp, ops::Conv3DOpMaker,
H
hong 已提交
762 763 764 765 766 767
                  ops::ConvOpInferVarType,
                  ops::Conv3DGradMaker<paddle::framework::OpDesc>,
                  ops::Conv3DGradMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(conv3d_grad, ops::ConvOpGrad,
                  ops::Conv3DDoubleGradMaker<paddle::framework::OpDesc>,
                  ops::Conv3DDoubleGradMaker<paddle::imperative::OpBase>);
L
lvmengsi 已提交
768
REGISTER_OPERATOR(conv3d_grad_grad, ops::ConvOpDoubleGrad);
C
chengduoZH 已提交
769

770 771
// depthwise conv kernel
// TODO(xingzhaolong): neon kernel for mobile
Z
zlx 已提交
772
REGISTER_OP_CPU_KERNEL(
773
    depthwise_conv2d,
X
xzl 已提交
774 775 776 777
    ops::GemmConvKernel<paddle::platform::CPUDeviceContext, float>,
    ops::GemmConvKernel<paddle::platform::CPUDeviceContext, double>);

REGISTER_OP_CPU_KERNEL(
778
    depthwise_conv2d_grad,
X
xzl 已提交
779 780
    ops::GemmConvGradKernel<paddle::platform::CPUDeviceContext, float>,
    ops::GemmConvGradKernel<paddle::platform::CPUDeviceContext, double>);
Z
zlx 已提交
781

C
chengduoZH 已提交
782
REGISTER_OP_CPU_KERNEL(
Q
QI JUN 已提交
783 784 785 786 787 788
    conv2d, ops::GemmConvKernel<paddle::platform::CPUDeviceContext, float>,
    ops::GemmConvKernel<paddle::platform::CPUDeviceContext, double>);
REGISTER_OP_CPU_KERNEL(
    conv2d_grad,
    ops::GemmConvGradKernel<paddle::platform::CPUDeviceContext, float>,
    ops::GemmConvGradKernel<paddle::platform::CPUDeviceContext, double>);
L
lvmengsi 已提交
789 790 791 792
REGISTER_OP_CPU_KERNEL(
    conv2d_grad_grad,
    ops::GemmConvDoubleGradKernel<paddle::platform::CPUDeviceContext, float>,
    ops::GemmConvDoubleGradKernel<paddle::platform::CPUDeviceContext, double>);
C
chengduoZH 已提交
793 794

REGISTER_OP_CPU_KERNEL(
Q
QI JUN 已提交
795 796 797 798 799 800
    conv3d, ops::GemmConvKernel<paddle::platform::CPUDeviceContext, float>,
    ops::GemmConvKernel<paddle::platform::CPUDeviceContext, double>);
REGISTER_OP_CPU_KERNEL(
    conv3d_grad,
    ops::GemmConvGradKernel<paddle::platform::CPUDeviceContext, float>,
    ops::GemmConvGradKernel<paddle::platform::CPUDeviceContext, double>);
L
lvmengsi 已提交
801 802 803 804
REGISTER_OP_CPU_KERNEL(
    conv3d_grad_grad,
    ops::GemmConvDoubleGradKernel<paddle::platform::CPUDeviceContext, float>,
    ops::GemmConvDoubleGradKernel<paddle::platform::CPUDeviceContext, double>);