conv_op.cc 35.8 KB
Newer Older
1
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
C
chengduoZH 已提交
2

L
Luo Tao 已提交
3 4 5
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
C
chengduoZH 已提交
6

L
Luo Tao 已提交
7
    http://www.apache.org/licenses/LICENSE-2.0
C
chengduoZH 已提交
8

L
Luo Tao 已提交
9 10 11 12 13
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
C
chengduoZH 已提交
14

Y
Yi Wang 已提交
15
#include "paddle/fluid/operators/conv_op.h"
Y
Update  
Yi Wang 已提交
16

17
#include <memory>
Y
Update  
Yi Wang 已提交
18 19 20
#include <string>
#include <vector>

21 22
#include "paddle/fluid/framework/op_version_registry.h"

23
#ifdef PADDLE_WITH_CUDA
24
#include "paddle/fluid/operators/conv_cudnn_op_cache.h"
25 26 27 28 29
#include "paddle/fluid/platform/cudnn_helper.h"
#endif
#ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/platform/mkldnn_helper.h"
#endif
30
#include "paddle/fluid/platform/cudnn_workspace_helper.h"
C
chengduoZH 已提交
31 32 33 34

namespace paddle {
namespace operators {

35 36
std::vector<int64_t> ConvOp::ComputeOutputShape(
    framework::InferShapeContext* ctx) const {
37 38
  OP_INOUT_CHECK(ctx->HasInput("Input"), "Input", "Input", "Conv");
  OP_INOUT_CHECK(ctx->HasInput("Filter"), "Input", "Filter", "Conv");
C
chengduoZH 已提交
39 40 41

  auto in_dims = ctx->GetInputDim("Input");
  auto filter_dims = ctx->GetInputDim("Filter");
42

C
chengduoZH 已提交
43 44
  std::vector<int> strides = ctx->Attrs().Get<std::vector<int>>("strides");
  std::vector<int> paddings = ctx->Attrs().Get<std::vector<int>>("paddings");
L
liym27 已提交
45 46
  std::string padding_algorithm =
      ctx->Attrs().Get<std::string>("padding_algorithm");
C
chengduoZH 已提交
47
  int groups = ctx->Attrs().Get<int>("groups");
C
chengduoZH 已提交
48
  std::vector<int> dilations = ctx->Attrs().Get<std::vector<int>>("dilations");
L
liym27 已提交
49
  const std::string data_format = ctx->Attrs().Get<std::string>("data_format");
50 51 52 53 54

  // MKL-DNN Kernels are using NCHW order of dims description
  // so we ignore data_format consideration for MKL-DNN kernel
  const bool channel_last = (this->IsMKLDNNType() == false) &&
                            (data_format == "NHWC" || data_format == "NDHWC");
C
chengduoZH 已提交
55

56 57
  PADDLE_ENFORCE_EQ(
      in_dims.size() == 4 || in_dims.size() == 5, true,
58
      platform::errors::InvalidArgument(
59 60
          "The input of Op(Conv) should be a 4-D or 5-D Tensor. But "
          "received: input's dimension is %u, input's shape is [%s].",
61
          in_dims.size(), in_dims));
62

C
chengduoZH 已提交
63 64
  PADDLE_ENFORCE_EQ(
      in_dims.size(), filter_dims.size(),
65
      platform::errors::InvalidArgument(
66 67 68 69
          "The input's dimension and filter's dimension of "
          "Op(Conv) should be equal. But received: the input's shape is [%s], "
          "the input's dimension is %d; the filter's shape is [%s],  "
          "the filter's dimension is %d.",
70
          in_dims, in_dims.size(), filter_dims, filter_dims.size()));
71 72

  int in_sub_stride_size = in_dims.size() - strides.size();
73 74 75
  PADDLE_ENFORCE_EQ(
      in_dims.size(), strides.size() + 2U,
      platform::errors::InvalidArgument(
76 77 78 79 80 81 82
          "The difference of input's dimension and Attr(strides)'s "
          "length must be euqal to 2 for Op(Conv). "
          "But received: input's dimension is %d, input's shape is [%s]; "
          "Attr(stride)'s length is %d, Attr(stride) is [%s]; "
          "difference of input's dimention and Attr(strides)'s length = %u.",
          in_dims.size(), in_dims, strides.size(),
          framework::make_ddim(strides), in_sub_stride_size));
L
liym27 已提交
83 84 85

  const auto input_channels =
      channel_last ? in_dims[in_dims.size() - 1] : in_dims[1];
F
fengjiayi 已提交
86

87 88
  PADDLE_ENFORCE_EQ(
      input_channels, filter_dims[1] * groups,
89
      platform::errors::InvalidArgument(
90 91 92 93 94
          "The number of input's channels should be equal to filter's channels "
          "* groups for Op(Conv). But received: the input's channels is %d, "
          "the input's shape is [%s]; the filter's channels is %d, the "
          "filter's shape is [%s]; the groups is %d, the data_format is %s. "
          "The error may come from wrong data_format setting.",
95 96
          input_channels, in_dims, filter_dims[1], filter_dims, groups,
          data_format));
C
chengduoZH 已提交
97
  PADDLE_ENFORCE_EQ(
Y
Yang Yu 已提交
98
      filter_dims[0] % groups, 0,
99
      platform::errors::InvalidArgument(
100 101 102 103
          "The number of output's channels (filter's first dimension) of "
          "Op(Conv) should be divided by groups. But received: "
          "the output channels is %d, the filter's shape is [%s], "
          "the groups is %d.",
104
          filter_dims[0], filter_dims, groups));
C
chengduoZH 已提交
105

L
liym27 已提交
106 107 108 109 110 111
  framework::DDim in_data_dims;
  if (channel_last) {
    in_data_dims = framework::slice_ddim(in_dims, 1, in_dims.size() - 1);
  } else {
    in_data_dims = framework::slice_ddim(in_dims, 2, in_dims.size());
  }
112

113 114
  framework::DDim filter_data_dims =
      framework::slice_ddim(filter_dims, 2, filter_dims.size());
115

L
liym27 已提交
116 117 118 119 120 121 122 123
  std::vector<int> ksize = framework::vectorize<int>(filter_data_dims);
  UpdatePaddingAndDilation(&paddings, &dilations, padding_algorithm,
                           in_data_dims, strides, ksize);

  std::vector<int64_t> output_shape({in_dims[0]});
  if (!channel_last) {
    output_shape.push_back(filter_dims[0]);
  }
124
  for (int i = 0; i < in_data_dims.size(); ++i) {
T
tink2123 已提交
125
    if ((!ctx->IsRuntime()) &&
L
liym27 已提交
126
        (in_data_dims[i] <= 0 || filter_dims[i + 2] <= 0)) {
T
tink2123 已提交
127 128
      output_shape.push_back(-1);
    } else {
129 130 131
      output_shape.push_back(
          ConvOutputSize(in_data_dims[i], filter_data_dims[i], dilations[i],
                         paddings[2 * i], paddings[2 * i + 1], strides[i]));
T
tink2123 已提交
132
    }
C
chengduoZH 已提交
133
  }
L
liym27 已提交
134 135 136 137
  if (channel_last) {
    output_shape.push_back(filter_dims[0]);
  }

138
  return output_shape;
C
chengduoZH 已提交
139 140
}

141 142
framework::OpKernelType ConvOp::GetExpectedKernelType(
    const framework::ExecutionContext& ctx) const {
X
Xin Pan 已提交
143 144
  int customized_type_value =
      framework::OpKernelType::kDefaultCustomizedTypeValue;
145
  framework::LibraryType library{framework::LibraryType::kPlain};
M
mozga-intel 已提交
146
  // TODO(pzelazko-intel): enable MKLDNN layout when it's ready
147
  auto input_data_type = OperatorWithKernel::IndicateVarDataType(ctx, "Input");
L
liym27 已提交
148 149
  std::string data_format =
      "AnyLayout";  // todo enable data layout when it's ready
M
mozga-intel 已提交
150 151
  framework::DataLayout layout = framework::StringToDataLayout(data_format);

C
chengduoZH 已提交
152
#ifdef PADDLE_WITH_CUDA
153
  if (platform::CanCUDNNBeUsed(ctx)) {
154
    library = framework::LibraryType::kCUDNN;
C
chengduoZH 已提交
155 156
  }
#endif
157
#ifdef PADDLE_WITH_MKLDNN
158
  if (library == framework::LibraryType::kPlain &&
159
      platform::CanMKLDNNBeUsed(ctx)) {
160
    library = framework::LibraryType::kMKLDNN;
M
mozga-intel 已提交
161
    layout = framework::DataLayout::kMKLDNN;
162
    customized_type_value =
163 164
        (input_data_type == framework::DataTypeTrait<int8_t>::DataType() ||
         input_data_type == framework::DataTypeTrait<uint8_t>::DataType())
165 166
            ? kConvMKLDNNINT8
            : kConvMKLDNNFP32;
167
  }
168
#endif
169

170
  if (input_data_type != framework::proto::VarType::INT8 &&
171 172
      input_data_type != framework::proto::VarType::UINT8 &&
      input_data_type != framework::proto::VarType::BF16) {
173 174
    auto filter_data_type = ctx.Input<Tensor>("Filter")->type();
    PADDLE_ENFORCE_EQ(input_data_type, filter_data_type,
175 176
                      platform::errors::InvalidArgument(
                          "input and filter data type should be consistent"));
177
  }
K
Kexin Zhao 已提交
178
  if (input_data_type == framework::proto::VarType::FP16) {
179
    PADDLE_ENFORCE_EQ(library, framework::LibraryType::kCUDNN,
180 181
                      platform::errors::InvalidArgument(
                          "float16 can only be used when CUDNN is used"));
K
Kexin Zhao 已提交
182 183
  }

184 185 186
  auto type = framework::OpKernelType(input_data_type, ctx.GetPlace(), layout,
                                      library, customized_type_value);
  return type;
187 188
}

189 190 191 192 193 194 195 196 197 198 199 200 201
framework::OpKernelType ConvOp::GetKernelTypeForVar(
    const std::string& var_name, const Tensor& tensor,
    const framework::OpKernelType& expected_kernel_type) const {
#ifdef PADDLE_WITH_MKLDNN
  // Only input require reshaping, weights and
  // bias are having shape in NCHW order
  if ((var_name == "Input") &&
      (expected_kernel_type.data_layout_ == framework::DataLayout::kMKLDNN) &&
      (tensor.layout() != framework::DataLayout::kMKLDNN)) {
    auto attrs = Attrs();
    auto ar = paddle::framework::AttrReader(attrs);
    const std::string data_format = ar.Get<std::string>("data_format");
    auto dl = framework::StringToDataLayout(data_format);
202
    // Some models may have intentionally set "AnyLayout" for conv
203 204
    // op. Treat this as NCHW (default data_format value)
    if (dl != framework::DataLayout::kAnyLayout) {
205 206
      return framework::OpKernelType(expected_kernel_type.data_type_,
                                     tensor.place(), dl);
207 208 209 210 211 212 213
    }
  }
#endif
  return framework::OpKernelType(expected_kernel_type.data_type_,
                                 tensor.place(), tensor.layout());
}

Y
Yu Yang 已提交
214
void Conv2DOpMaker::Make() {
215 216 217 218
  AddAttr<bool>("is_test",
                "(bool, default false) Set to true for inference only, false "
                "for training. Some layers may run faster when this is true.")
      .SetDefault(false);
L
liym27 已提交
219 220 221 222 223 224
  AddInput("Input",
           "(Tensor) The input tensor of convolution operator. "
           "The format of input tensor is NCHW or NHWC, where N is batch size, "
           "C is the "
           "number of channels, H is the height of the feature, "
           "and W is the width of the feature.");
C
chengduoZH 已提交
225
  AddInput("Filter",
C
fix doc  
chengduoZH 已提交
226
           "(Tensor) The filter tensor of convolution operator. "
C
chengduoZH 已提交
227 228
           "The format of the filter tensor is MCHW, where M is the number of "
           "output image channels, C is the number of input image channels, "
C
fix doc  
chengduoZH 已提交
229 230
           "H is the height of the filter, and W is the width of the filter. "
           "If the groups attribute is greater than 1, C equals the number of "
C
chengduoZH 已提交
231
           "input image channels divided by the groups.");
232 233 234 235 236
  AddInput("Bias",
           "(Tensor) Bias to be added to each output of filter application."
           "The format of output tensor is X (one-dimensional) of size equal"
           "to the number of output channels. Only used with MKL-DNN.")
      .AsDispensable();
237 238 239
  AddInput("ResidualData",
           "(Tensor) Tensor with residual data "
           "to which convolution output will be added."
240
           "Used with fuse_residual_connection fusion.")
241
      .AsDispensable();
Y
Yihua Xu 已提交
242 243
  AddOutput("Output",
            "(Tensor) The output tensor of convolution operator. "
L
liym27 已提交
244
            "It has same data fromat and data type as the Input.");
C
chengduoZH 已提交
245 246 247 248
  AddAttr<std::vector<int>>("strides",
                            "(vector<int> default:{1, 1}), the "
                            "strides(h_stride, w_stride) of "
                            "convolution operator.")
C
chengduoZH 已提交
249
      .SetDefault({1, 1});
C
chengduoZH 已提交
250 251
  AddAttr<std::vector<int>>("paddings",
                            "(vector<int> default:{0, 0}), the "
L
liym27 已提交
252 253
                            "paddings(pad_height_top, pad_height_bottom, "
                            "pad_width_left, pad_wifth_right)  of "
C
chengduoZH 已提交
254
                            "convolution operator.")
C
chengduoZH 已提交
255
      .SetDefault({0, 0});
L
liym27 已提交
256 257 258 259 260 261
  AddAttr<std::string>(
      "padding_algorithm",
      "(string, default \"EXPLICIT\") An optional string from: \"EXPLICIT\","
      "\"SAME\",\"VALID\". Set to \"EXPLICIT\" for explicit padding. "
      "Set to \"SAME\" or \"VALID\" for algorithm of padding. ")
      .SetDefault("EXPLICIT");
C
chengduoZH 已提交
262 263
  AddAttr<int>(
      "groups",
C
chengduoZH 已提交
264
      "(int default:1), the groups number of the convolution operator. "
C
fix doc  
chengduoZH 已提交
265 266 267 268
      "According to grouped convolution in Alex Krizhevsky's Deep CNN paper: "
      "when group=2, the first half of the filters is only connected to the "
      "first half of the input channels, while the second half of the filters "
      "is only connected to the second half of the input channels.")
C
chengduoZH 已提交
269
      .SetDefault(1);
C
chengduoZH 已提交
270
  AddAttr<std::vector<int>>("dilations",
C
chengduoZH 已提交
271 272
                            "(vector<int> default:{1, 1}), the "
                            "dilations(h_dilation, w_dilation) of "
C
chengduoZH 已提交
273
                            "convolution operator.")
C
chengduoZH 已提交
274
      .SetDefault({1, 1});
275 276 277 278
  AddAttr<bool>(
      "use_cudnn",
      "(bool, default false) Only used in cudnn kernel, need install cudnn")
      .SetDefault(false);
279 280 281
  AddAttr<bool>("fuse_relu_before_depthwise_conv",
                "(bool, default false) Only used in cuda depthwise kernel")
      .SetDefault(false);
282 283 284
  AddAttr<bool>("use_mkldnn",
                "(bool, default false) Only used in mkldnn kernel")
      .SetDefault(false);
285 286 287 288
  AddAttr<bool>(
      "use_quantizer",
      "(bool, default false) "
      "This parameter is no longer used. Use 'mkldnn_data_type' instead.")
289
      .SetDefault(false);
290 291 292 293 294
  AddAttr<std::string>(
      "mkldnn_data_type",
      "(string, default \"float32\"). Data type of mkldnn kernel")
      .SetDefault("float32")
      .InEnum({"float32", "int8", "bfloat16"});
M
Michal Gallus 已提交
295 296
  AddAttr<bool>("fuse_relu", "(bool, default false) Only used in mkldnn kernel")
      .SetDefault(false);
297 298 299 300 301 302
  AddAttr<bool>("fuse_brelu",
                "(bool, default false) Only used in mkldnn kernel")
      .SetDefault(false);
  AddAttr<float>("fuse_brelu_threshold",
                 "(float, default false 6.0) Only used in mkldnn kernel")
      .SetDefault(6.0f);
303 304 305 306 307 308 309 310
  AddAttr<std::string>("fuse_activation",
                       "(string, default \"\") Only used in mkldnn kernel")
      .SetDefault("");
  AddAttr<float>("fuse_alpha",
                 "(float, default 0.0) Only used in mkldnn kernel")
      .SetDefault(0.0f);
  AddAttr<float>("fuse_beta", "(float, default 0.0) Only used in mkldnn kernel")
      .SetDefault(0.0f);
311 312 313 314 315
  AddAttr<bool>(
      "use_addto",
      "(bool, default false) If use addto strategy or not, only used in "
      "cudnn kernel")
      .SetDefault(false);
316
  AddAttr<bool>("fuse_residual_connection",
317
                "(bool, default false) Only used in mkldnn kernel. Used "
318 319
                "whenever convolution output is as an input to residual "
                "connection.")
320
      .SetDefault(false);
321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340
  AddAttr<float>("Scale_in",
                 "Scale_in to be used for int8 input data."
                 "Only used with MKL-DNN INT8.")
      .SetDefault(1.0f);
  AddAttr<float>("Scale_out",
                 "Scale_out to be used for int8 output data."
                 "Only used with MKL-DNN INT8.")
      .SetDefault(1.0f);
  AddAttr<float>("Scale_in_eltwise",
                 "Scale_in_eltwise to be used for int8 eltwise input data."
                 "Only used with MKL-DNN INT8.")
      .SetDefault(1.0f);
  AddAttr<std::vector<float>>("Scale_weights",
                              "Scale_weights to be used for int8 weights data."
                              "Only used with MKL-DNN INT8.")
      .SetDefault({1.0f});
  AddAttr<bool>("force_fp32_output",
                "(bool, default false) Force INT8 kernel output FP32, only "
                "used in MKL-DNN INT8")
      .SetDefault(false);
341 342 343 344 345 346
  AddAttr<std::string>(
      "data_format",
      "(string, default NCHW) Only used in "
      "An optional string from: \"NHWC\", \"NCHW\". "
      "Defaults to \"NHWC\". Specify the data format of the output data, "
      "the input will be transformed automatically. ")
L
liym27 已提交
347
      .SetDefault("NCHW");
348 349 350 351 352 353 354 355
  // TODO(dzhwinter): need to registered layout transform function
  AddAttr<int>("workspace_size_MB",
               "Only used in cudnn kernel. Need set use_cudnn to true."
               "workspace size for cudnn, in MB, "
               "workspace is a section of GPU memory which will be "
               "allocated/freed each time the operator runs, larger "
               "workspace size can increase performance but also requires "
               "better hardware. This size should be chosen carefully.")
356
      .SetDefault(platform::GetDefaultConvWorkspaceSizeLimitMB());
357 358
  AddAttr<bool>("exhaustive_search",
                "(bool, default false) cuDNN has many algorithm to calculation "
C
chengduo 已提交
359
                "convolution, whether enable exhaustive search "
翟飞跃 已提交
360
                "for cuDNN convolution or not, default is False.")
361
      .SetDefault(false);
L
liym27 已提交
362

C
chengduoZH 已提交
363
  AddComment(R"DOC(
C
fix doc  
chengduoZH 已提交
364 365
Convolution Operator.

C
chengduoZH 已提交
366
The convolution operation calculates the output based on the input, filter
C
chengduoZH 已提交
367
and strides, paddings, dilations, groups parameters. The size of each dimension of the
C
chengduoZH 已提交
368
parameters is checked in the infer-shape.
L
liym27 已提交
369
Input(Input) and Output(Output) are in NCHW or NHWC format. Where N is batch
C
fix doc  
chengduoZH 已提交
370
size, C is the number of channels, H is the height of the feature, and W is
C
chengduoZH 已提交
371
the width of the feature.
372
Filters(Input) is MCHW format format. Where M is the number of output image channels, C is
C
chengduoZH 已提交
373 374 375 376
the number of input image channels, H is the height of the filter, and W
is the width of the filter.
Parameters(strides, paddings, dilations) are two elements. These two elements represent
height and width, respectively.
C
chengduoZH 已提交
377 378 379 380
The input(X) size and output(Out) size may be different.

Example:
  Input:
C
chengduoZH 已提交
381 382
       Input shape: $(N, C_{in}, H_{in}, W_{in})$
       Filter shape: $(C_{out}, C_{in}, H_f, W_f)$
C
chengduoZH 已提交
383
  Output:
C
chengduoZH 已提交
384 385 386
       Output shape: $(N, C_{out}, H_{out}, W_{out})$
  Where
$$
L
liym27 已提交
387 388
       H_{out}= \frac{(H_{in} + pad_height_top + pad_height_bottom - (dilations[0] * (H_f - 1) + 1))}{strides[0]}+ 1 \\
       W_{out}= \frac{(W_{in} + pad_width_left + pad_width_right - (dilations[1] * (W_f - 1) + 1))}{strides[1]}+ 1
C
chengduoZH 已提交
389
$$
C
chengduoZH 已提交
390
)DOC");
Q
qingqing01 已提交
391
  Apply();
C
chengduoZH 已提交
392 393
}

Y
Yu Yang 已提交
394
void Conv3DOpMaker::Make() {
395 396 397 398
  AddAttr<bool>("is_test",
                "(bool, default false) Set to true for inference only, false "
                "for training. Some layers may run faster when this is true.")
      .SetDefault(false);
C
chengduoZH 已提交
399 400
  AddInput(
      "Input",
C
fix doc  
chengduoZH 已提交
401
      "(Tensor) The input tensor of convolution operator. "
L
liym27 已提交
402 403
      "The format of input tensor is NCDHW or NDHWC. Where N is batch size, C "
      "is the "
C
fix doc  
chengduoZH 已提交
404 405 406
      "number of channels, D is the depth of the feature, H is the height of "
      "the feature, "
      "and W is the width of the feature.");
C
chengduoZH 已提交
407
  AddInput("Filter",
C
fix doc  
chengduoZH 已提交
408
           "(Tensor) The filter tensor of convolution operator. "
C
chengduoZH 已提交
409 410
           "The format of the filter tensor is MCDHW, where M is the number of "
           "output image channels, C is the number of input image channels, "
C
fix doc  
chengduoZH 已提交
411 412 413
           "D is the depth of the filter, H is the height of the filter, and W "
           "is the width of the filter."
           "If the groups attribute is greater than 1, C equals the number of "
C
chengduoZH 已提交
414
           "input image channels divided by the groups.");
415 416 417 418 419
  AddInput("ResidualData",
           "(Tensor) Tensor with residual data "
           "to which convolution output will be added."
           "Used with fuse_residual_connection fusion.")
      .AsDispensable();
Y
Yihua Xu 已提交
420 421
  AddOutput("Output",
            "(Tensor) The output tensor of convolution operator."
L
liym27 已提交
422
            "It has same data fromat and data type as the Input.");
C
chengduoZH 已提交
423 424 425 426
  AddAttr<std::vector<int>>("strides",
                            "(vector<int>, default:{1, 1, 1}), the "
                            "strides(d_stride, h_stride, w_stride) of "
                            "convolution operator.")
C
chengduoZH 已提交
427
      .SetDefault({1, 1, 1});
L
liym27 已提交
428 429 430 431 432 433
  AddAttr<std::vector<int>>(
      "paddings",
      "(vector<int>, default:{0, 0, 0}), the "
      "paddings(pad_depth_front, pad_depth_back, pad_height_top, "
      "pad_height_bottom, pad_width_left, pad_width_right) of convolution "
      "operator.")
C
chengduoZH 已提交
434
      .SetDefault({0, 0, 0});
L
liym27 已提交
435 436 437 438 439 440
  AddAttr<std::string>(
      "padding_algorithm",
      "(string, default \"EXPLICIT\") An optional string from: \"EXPLICIT\","
      "\"SAME\",\"VALID\". Set to \"EXPLICIT\" for explicit padding. "
      "Set to \"SAME\" or \"VALID\" for algorithm of padding. ")
      .SetDefault("EXPLICIT");
C
chengduoZH 已提交
441 442
  AddAttr<int>(
      "groups",
C
chengduoZH 已提交
443
      "(int default:1), the groups number of the convolution operator. "
C
fix doc  
chengduoZH 已提交
444 445 446 447
      "According to grouped convolution in Alex Krizhevsky's Deep CNN paper: "
      "when group=2, the first half of the filters is only connected to the "
      "first half of the input channels, while the second half of the filters "
      "is only connected to the second half of the input channels.")
C
chengduoZH 已提交
448
      .SetDefault(1);
C
chengduoZH 已提交
449
  AddAttr<std::vector<int>>("dilations",
C
chengduoZH 已提交
450 451
                            "(vector<int> default:{1, 1, 1}), the "
                            "dilations(d_dilation, h_dilation, w_dilation) of "
C
chengduoZH 已提交
452
                            "convolution operator.")
C
chengduoZH 已提交
453
      .SetDefault({1, 1, 1});
454 455 456 457
  AddAttr<bool>(
      "use_cudnn",
      "(bool, default false) Only used in cudnn kernel, need install cudnn")
      .SetDefault(false);
458 459 460
  AddAttr<bool>("use_mkldnn",
                "(bool, default false) Only used in mkldnn kernel")
      .SetDefault(false);
461 462 463 464 465
  AddAttr<std::string>(
      "mkldnn_data_type",
      "(string, default \"float32\"). Data type of mkldnn kernel")
      .SetDefault("float32")
      .InEnum({"float32", "int8", "bfloat16"});
466 467
  AddAttr<bool>("fuse_relu", "(bool, default false) Only used in mkldnn kernel")
      .SetDefault(false);
468 469 470 471 472 473 474 475
  AddAttr<std::string>("fuse_activation",
                       "(string, default \"\") Only used in mkldnn kernel")
      .SetDefault("");
  AddAttr<float>("fuse_alpha",
                 "(float, default 0.0) Only used in mkldnn kernel")
      .SetDefault(0.0f);
  AddAttr<float>("fuse_beta", "(float, default 0.0) Only used in mkldnn kernel")
      .SetDefault(0.0f);
476 477 478 479 480
  AddAttr<bool>(
      "use_addto",
      "(bool, default false) If use addto strategy or not, only used in "
      "cudnn kernel")
      .SetDefault(false);
481 482 483 484 485
  AddAttr<bool>("fuse_residual_connection",
                "(bool, default false) Only used in mkldnn kernel. Used "
                "whenever convolution output is as an input to residual "
                "connection.")
      .SetDefault(false);
486 487
  AddAttr<std::string>(
      "data_format",
L
liym27 已提交
488 489 490
      "(string, default NCDHW) Only used in "
      "An optional string from: \"NDHWC\", \"NCDHW\". "
      "Defaults to \"NDHWC\". Specify the data format of the output data, "
491
      "the input will be transformed automatically. ")
L
liym27 已提交
492
      .SetDefault("NCDHW");
493 494 495
  AddAttr<bool>("force_fp32_output",
                "(bool, default false) Only used in mkldnn INT8 kernel")
      .SetDefault(false);
496 497 498 499 500 501 502
  // TODO(dzhwinter): need to registered layout transform function
  AddAttr<int>("workspace_size_MB",
               "Only used in cudnn kernel. workspace size for cudnn, in MB, "
               "workspace is a section of GPU memory which will be "
               "allocated/freed each time the operator runs, larger "
               "workspace size can increase performance but also requires "
               "better hardware. This size should be chosen carefully.")
503
      .SetDefault(platform::GetDefaultConvWorkspaceSizeLimitMB());
504 505
  AddAttr<bool>("exhaustive_search",
                "(bool, default false) cuDNN has many algorithm to calculation "
C
chengduo 已提交
506
                "convolution, whether enable exhaustive search "
翟飞跃 已提交
507
                "for cuDNN convolution or not, default is False.")
508
      .SetDefault(false);
C
chengduoZH 已提交
509
  AddComment(R"DOC(
C
fix doc  
chengduoZH 已提交
510 511
Convolution3D Operator.

C
chengduoZH 已提交
512
The convolution operation calculates the output based on the input, filter
C
chengduoZH 已提交
513
and strides, paddings, dilations, groups parameters. The size of each dimension of the
C
chengduoZH 已提交
514
parameters is checked in the infer-shape.
L
liym27 已提交
515
Input(Input) and output(Output) are in NCDHW or NDHWC format, where N is batch
C
fix doc  
chengduoZH 已提交
516
size, C is the number of channels,D is the depth of the feature, H is the height of
C
chengduoZH 已提交
517 518 519 520 521 522
the feature, and W is the width of the feature.
Filters(Input) is MCDHW format, where M is the number of output image channels,
C is the number of input image channels, D is the depth of the filter,
H is the height of the filter, and W is the width of the filter.
Parameters(strides, paddings, dilations) are three elements. These three elements
represent depth, height and width, respectively.
C
fix doc  
chengduoZH 已提交
523 524 525 526
The input(X) size and output(Out) size may be different.

Example:
  Input:
C
chengduoZH 已提交
527 528
       Input shape: $(N, C_{in}, D_{in}, H_{in}, W_{in})$
       Filter shape: $(C_{out}, C_{in}, D_f, H_f, W_f)$
C
fix doc  
chengduoZH 已提交
529
  Output:
C
chengduoZH 已提交
530 531 532
       Output shape: $(N, C_{out}, D_{out}, H_{out}, W_{out})$
  Where
  $$
L
liym27 已提交
533 534 535
       D_{out}= \frac{(D_{in} + pad_depth_front + pad_depth_back - (dilations[0] * (D_f - 1) + 1))}{ strides[0]}+ 1 \\
       H_{out}= \frac{(H_{in} + pad_height_top + pad_height_bottom - (dilations[1] * (H_f - 1) + 1))}{ strides[1]}+ 1 \\
       W_{out}= \frac{(W_{in} + pad_width_left + pad_width_right - (dilations[2] * (W_f - 1) + 1))}{ strides[2]}+ 1
C
chengduoZH 已提交
536
  $$
C
chengduoZH 已提交
537
)DOC");
Q
qingqing01 已提交
538
  Apply();
C
chengduoZH 已提交
539 540
}

C
chengduoZH 已提交
541 542 543 544 545 546 547 548 549 550 551
void ConvOpGrad::InferShape(framework::InferShapeContext* ctx) const {
  auto in_dims = ctx->GetInputDim("Input");
  auto filter_dims = ctx->GetInputDim("Filter");
  if (ctx->HasOutput(framework::GradVarName("Input"))) {
    ctx->SetOutputDim(framework::GradVarName("Input"), in_dims);
  }
  if (ctx->HasOutput(framework::GradVarName("Filter"))) {
    ctx->SetOutputDim(framework::GradVarName("Filter"), filter_dims);
  }
}

552 553
framework::OpKernelType ConvOpGrad::GetExpectedKernelType(
    const framework::ExecutionContext& ctx) const {
X
Xin Pan 已提交
554 555
  int customized_type_value =
      framework::OpKernelType::kDefaultCustomizedTypeValue;
556
  framework::LibraryType library_{framework::LibraryType::kPlain};
M
mozga-intel 已提交
557
  // TODO(pzelazko-intel): enable MKLDNN layout when it's ready
L
liym27 已提交
558
  std::string data_format = "AnyLayout";
M
mozga-intel 已提交
559 560
  framework::DataLayout layout_ = framework::StringToDataLayout(data_format);

C
chengduoZH 已提交
561
#ifdef PADDLE_WITH_CUDA
562 563
  if (platform::CanCUDNNBeUsed(ctx)) {
    library_ = framework::LibraryType::kCUDNN;
C
chengduoZH 已提交
564 565
  }
#endif
566 567 568
#ifdef PADDLE_WITH_MKLDNN
  if (library_ == framework::LibraryType::kPlain &&
      platform::CanMKLDNNBeUsed(ctx)) {
569
    const std::string data_format = ctx.Attr<std::string>("data_format");
570
    library_ = framework::LibraryType::kMKLDNN;
M
mozga-intel 已提交
571
    layout_ = framework::DataLayout::kMKLDNN;
X
Xin Pan 已提交
572
    customized_type_value = kConvMKLDNNFP32;
573
  }
574
#endif
575

576 577 578
  auto type = framework::OpKernelType(
      OperatorWithKernel::IndicateVarDataType(ctx, "Input"), ctx.GetPlace(),
      layout_, library_, customized_type_value);
579
  return type;
580 581
}

582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607
framework::OpKernelType ConvOpGrad::GetKernelTypeForVar(
    const std::string& var_name, const Tensor& tensor,
    const framework::OpKernelType& expected_kernel_type) const {
#ifdef PADDLE_WITH_MKLDNN
  // Only input require reshaping, weights and
  // bias are having shape in NCHW order
  if (((var_name == "Input") ||
       (var_name == framework::GradVarName("Output"))) &&
      (expected_kernel_type.data_layout_ == framework::DataLayout::kMKLDNN) &&
      (tensor.layout() != framework::DataLayout::kMKLDNN)) {
    auto attrs = Attrs();
    auto ar = paddle::framework::AttrReader(attrs);
    const std::string data_format = ar.Get<std::string>("data_format");
    auto dl = framework::StringToDataLayout(data_format);
    // Some models may have intentionally set "AnyLayout" for pool
    // op. Treat this as NCHW (default data_format value)
    if (dl != framework::DataLayout::kAnyLayout) {
      return framework::OpKernelType(expected_kernel_type.data_type_,
                                     tensor.place(), dl);
    }
  }
#endif
  return framework::OpKernelType(expected_kernel_type.data_type_,
                                 tensor.place(), tensor.layout());
}

H
hong 已提交
608 609
template <typename T>
class Conv2DGradMaker : public framework::SingleGradOpMaker<T> {
610
 public:
H
hong 已提交
611
  using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
612

613
  void Apply(GradOpPtr<T> op) const override {
S
sneaxiy 已提交
614
    op->SetType(this->ForwardOpType() + "_grad");
H
hong 已提交
615 616 617 618
    op->SetInput("Input", this->Input("Input"));
    op->SetInput("Filter", this->Input("Filter"));
    op->SetInput("Bias", this->Input("Bias"));
    op->SetInput(framework::GradVarName("Output"), this->OutputGrad("Output"));
619

H
hong 已提交
620 621 622 623
    op->SetOutput(framework::GradVarName("Input"), this->InputGrad("Input"));
    op->SetOutput(framework::GradVarName("Filter"), this->InputGrad("Filter"));
    op->SetOutput(framework::GradVarName("Bias"), this->InputGrad("Bias"));
    op->SetAttrMap(this->Attrs());
624
  }
S
sneaxiy 已提交
625 626
};

H
hong 已提交
627 628
template <typename T>
class Conv3DGradMaker : public framework::SingleGradOpMaker<T> {
S
sneaxiy 已提交
629
 public:
H
hong 已提交
630
  using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
631

632
  void Apply(GradOpPtr<T> op) const override {
S
sneaxiy 已提交
633
    op->SetType(this->ForwardOpType() + "_grad");
H
hong 已提交
634 635 636
    op->SetInput("Input", this->Input("Input"));
    op->SetInput("Filter", this->Input("Filter"));
    op->SetInput(framework::GradVarName("Output"), this->OutputGrad("Output"));
S
sneaxiy 已提交
637

H
hong 已提交
638 639
    op->SetOutput(framework::GradVarName("Input"), this->InputGrad("Input"));
    op->SetOutput(framework::GradVarName("Filter"), this->InputGrad("Filter"));
S
sneaxiy 已提交
640

H
hong 已提交
641 642
    if (this->HasInput("ResidualData")) {
      op->SetInput("ResidualData", this->Input("ResidualData"));
S
sneaxiy 已提交
643 644
    }

H
hong 已提交
645
    op->SetAttrMap(this->Attrs());
646 647 648
  }
};

Q
qingqing01 已提交
649 650 651 652
/*
 * Inputs:  I, W, dO, ddI, ddW
 * Outputs: ddO, dW, dI
 */
H
hong 已提交
653 654
template <typename T>
class Conv2DDoubleGradMaker : public framework::SingleGradOpMaker<T> {
Q
qingqing01 已提交
655
 public:
H
hong 已提交
656
  using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
Q
qingqing01 已提交
657

658
  void Apply(GradOpPtr<T> op) const override {
Q
qingqing01 已提交
659 660
    op->SetType(this->ForwardOpType() + "_grad");
    // I, W, dO, ddI, ddW
H
hong 已提交
661 662 663 664 665 666
    op->SetInput("Input", this->Input("Input"));
    op->SetInput("Filter", this->Input("Filter"));
    op->SetInput("DOutput", this->Input(framework::GradVarName("Output")));
    op->SetInput("DDInput", this->OutputGrad(framework::GradVarName("Input")));
    op->SetInput("DDFilter",
                 this->OutputGrad(framework::GradVarName("Filter")));
Q
qingqing01 已提交
667 668 669 670

    // ddO, dI, dW
    // Unlike grad op, double grad op does not use name@GRAD@GRAD
    // as key of ops' inputs and outputs.
H
hong 已提交
671 672
    auto ddx = this->OutputGrad(framework::GradVarName("Input"));
    auto ddw = this->OutputGrad(framework::GradVarName("Filter"));
673

L
lvmengsi 已提交
674
    op->SetOutput("DDOutput",
H
hong 已提交
675
                  ddx.empty()
676
                      ? this->EmptyInputGrad()
H
hong 已提交
677
                      : this->InputGrad(framework::GradVarName("Output")));
678 679 680 681
    op->SetOutput("DFilter", ddx.empty() ? this->EmptyInputGrad()
                                         : this->InputGrad("Filter"));
    op->SetOutput("DInput", ddw.empty() ? this->EmptyInputGrad()
                                        : this->InputGrad("Input"));
682

H
hong 已提交
683
    op->SetAttrMap(this->Attrs());
Q
qingqing01 已提交
684 685 686
  }
};

L
lvmengsi 已提交
687 688 689 690
/*
 * Inputs:  I, W, dO, ddI, ddW
 * Outputs: ddO, dW, dI
 */
H
hong 已提交
691 692
template <typename T>
class Conv3DDoubleGradMaker : public framework::SingleGradOpMaker<T> {
L
lvmengsi 已提交
693
 public:
H
hong 已提交
694
  using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
L
lvmengsi 已提交
695

696
  void Apply(GradOpPtr<T> op) const override {
L
lvmengsi 已提交
697 698
    op->SetType(this->ForwardOpType() + "_grad");
    // I, W, dO, ddI, ddW
H
hong 已提交
699 700 701 702 703 704
    op->SetInput("Input", this->Input("Input"));
    op->SetInput("Filter", this->Input("Filter"));
    op->SetInput("DOutput", this->Input(framework::GradVarName("Output")));
    op->SetInput("DDInput", this->OutputGrad(framework::GradVarName("Input")));
    op->SetInput("DDFilter",
                 this->OutputGrad(framework::GradVarName("Filter")));
L
lvmengsi 已提交
705

H
hong 已提交
706 707
    auto ddx = this->OutputGrad(framework::GradVarName("Input"));
    auto ddw = this->OutputGrad(framework::GradVarName("Filter"));
L
lvmengsi 已提交
708

L
lvmengsi 已提交
709
    op->SetOutput("DDOutput",
H
hong 已提交
710
                  ddx.empty()
711
                      ? this->EmptyInputGrad()
H
hong 已提交
712
                      : this->InputGrad(framework::GradVarName("Output")));
713 714 715 716
    op->SetOutput("DFilter", ddx.empty() ? this->EmptyInputGrad()
                                         : this->InputGrad("Filter"));
    op->SetOutput("DInput", ddw.empty() ? this->EmptyInputGrad()
                                        : this->InputGrad("Input"));
L
lvmengsi 已提交
717

H
hong 已提交
718
    op->SetAttrMap(this->Attrs());
L
lvmengsi 已提交
719 720 721
  }
};

Q
qingqing01 已提交
722 723 724 725 726
void ConvOpDoubleGrad::InferShape(framework::InferShapeContext* ctx) const {
  auto x_dims = ctx->GetInputDim("Input");
  auto w_dims = ctx->GetInputDim("Filter");
  auto do_dims = ctx->GetInputDim("DOutput");

L
lvmengsi 已提交
727 728
  if (ctx->HasOutput("DDOutput") &&
      (ctx->HasInput("DDInput") || (ctx->HasInput("DDFilter")))) {
Q
qingqing01 已提交
729 730
    ctx->SetOutputDim("DDOutput", do_dims);
  }
731
  if (ctx->HasOutput("DFilter") && ctx->HasInput("DDInput")) {
Q
qingqing01 已提交
732 733
    ctx->SetOutputDim("DFilter", w_dims);
  }
734
  if (ctx->HasOutput("DInput") && ctx->HasInput("DDFilter")) {
Q
qingqing01 已提交
735 736 737 738 739 740 741 742 743
    ctx->SetOutputDim("DInput", x_dims);
  }
}

framework::OpKernelType ConvOpDoubleGrad::GetExpectedKernelType(
    const framework::ExecutionContext& ctx) const {
  int customized_type_value =
      framework::OpKernelType::kDefaultCustomizedTypeValue;
  framework::LibraryType library_{framework::LibraryType::kPlain};
L
liym27 已提交
744
  std::string data_format = "AnyLayout";
Q
qingqing01 已提交
745 746 747 748 749
  framework::DataLayout layout_ = framework::StringToDataLayout(data_format);

#ifdef PADDLE_WITH_CUDA
  if (platform::CanCUDNNBeUsed(ctx)) {
    library_ = framework::LibraryType::kCUDNN;
L
lvmengsi 已提交
750
  }
Q
qingqing01 已提交
751
#endif
752 753 754
  auto type = framework::OpKernelType(
      OperatorWithKernel::IndicateVarDataType(ctx, "Input"), ctx.GetPlace(),
      layout_, library_, customized_type_value);
Q
qingqing01 已提交
755 756 757
  return type;
}

C
chengduoZH 已提交
758 759 760 761
}  // namespace operators
}  // namespace paddle

namespace ops = paddle::operators;
Y
Yang Yang 已提交
762
REGISTER_OPERATOR(conv2d, ops::ConvOp, ops::Conv2DOpMaker,
H
hong 已提交
763 764 765 766 767 768
                  ops::ConvOpInferVarType,
                  ops::Conv2DGradMaker<paddle::framework::OpDesc>,
                  ops::Conv2DGradMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(conv2d_grad, ops::ConvOpGrad,
                  ops::Conv2DDoubleGradMaker<paddle::framework::OpDesc>,
                  ops::Conv2DDoubleGradMaker<paddle::imperative::OpBase>);
Q
qingqing01 已提交
769
REGISTER_OPERATOR(conv2d_grad_grad, ops::ConvOpDoubleGrad);
770 771

// depthwise convolution op
Y
Yang Yang 已提交
772
REGISTER_OPERATOR(depthwise_conv2d, ops::ConvOp, ops::Conv2DOpMaker,
H
hong 已提交
773 774 775
                  ops::ConvOpInferVarType,
                  ops::Conv2DGradMaker<paddle::framework::OpDesc>,
                  ops::Conv2DGradMaker<paddle::imperative::OpBase>);
776
REGISTER_OPERATOR(depthwise_conv2d_grad, ops::ConvOpGrad);
C
chengduo 已提交
777

Y
Yang Yang 已提交
778
REGISTER_OPERATOR(conv3d, ops::ConvOp, ops::Conv3DOpMaker,
H
hong 已提交
779 780 781 782 783 784
                  ops::ConvOpInferVarType,
                  ops::Conv3DGradMaker<paddle::framework::OpDesc>,
                  ops::Conv3DGradMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(conv3d_grad, ops::ConvOpGrad,
                  ops::Conv3DDoubleGradMaker<paddle::framework::OpDesc>,
                  ops::Conv3DDoubleGradMaker<paddle::imperative::OpBase>);
L
lvmengsi 已提交
785
REGISTER_OPERATOR(conv3d_grad_grad, ops::ConvOpDoubleGrad);
C
chengduoZH 已提交
786

787 788
// depthwise conv kernel
// TODO(xingzhaolong): neon kernel for mobile
Z
zlx 已提交
789
REGISTER_OP_CPU_KERNEL(
790
    depthwise_conv2d,
X
xzl 已提交
791 792 793 794
    ops::GemmConvKernel<paddle::platform::CPUDeviceContext, float>,
    ops::GemmConvKernel<paddle::platform::CPUDeviceContext, double>);

REGISTER_OP_CPU_KERNEL(
795
    depthwise_conv2d_grad,
X
xzl 已提交
796 797
    ops::GemmConvGradKernel<paddle::platform::CPUDeviceContext, float>,
    ops::GemmConvGradKernel<paddle::platform::CPUDeviceContext, double>);
Z
zlx 已提交
798

C
chengduoZH 已提交
799
REGISTER_OP_CPU_KERNEL(
Q
QI JUN 已提交
800 801 802 803 804 805
    conv2d, ops::GemmConvKernel<paddle::platform::CPUDeviceContext, float>,
    ops::GemmConvKernel<paddle::platform::CPUDeviceContext, double>);
REGISTER_OP_CPU_KERNEL(
    conv2d_grad,
    ops::GemmConvGradKernel<paddle::platform::CPUDeviceContext, float>,
    ops::GemmConvGradKernel<paddle::platform::CPUDeviceContext, double>);
L
lvmengsi 已提交
806 807 808 809
REGISTER_OP_CPU_KERNEL(
    conv2d_grad_grad,
    ops::GemmConvDoubleGradKernel<paddle::platform::CPUDeviceContext, float>,
    ops::GemmConvDoubleGradKernel<paddle::platform::CPUDeviceContext, double>);
C
chengduoZH 已提交
810 811

REGISTER_OP_CPU_KERNEL(
Q
QI JUN 已提交
812 813 814 815 816 817
    conv3d, ops::GemmConvKernel<paddle::platform::CPUDeviceContext, float>,
    ops::GemmConvKernel<paddle::platform::CPUDeviceContext, double>);
REGISTER_OP_CPU_KERNEL(
    conv3d_grad,
    ops::GemmConvGradKernel<paddle::platform::CPUDeviceContext, float>,
    ops::GemmConvGradKernel<paddle::platform::CPUDeviceContext, double>);
L
lvmengsi 已提交
818 819 820 821
REGISTER_OP_CPU_KERNEL(
    conv3d_grad_grad,
    ops::GemmConvDoubleGradKernel<paddle::platform::CPUDeviceContext, float>,
    ops::GemmConvDoubleGradKernel<paddle::platform::CPUDeviceContext, double>);
822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854

REGISTER_OP_VERSION(conv2d)
    .AddCheckpoint(
        R"ROC(
      Upgrade conv2d, add a new attribute [use_addto].
    )ROC",
        paddle::framework::compatible::OpVersionDesc().NewAttr(
            "use_addto",
            "In order to support new feature (inplace addto strategy) for "
            "gradient accumulation.",
            false));

REGISTER_OP_VERSION(depthwise_conv2d)
    .AddCheckpoint(
        R"ROC(
      Upgrade depthwise_conv2d, add a new attribute [use_addto].
    )ROC",
        paddle::framework::compatible::OpVersionDesc().NewAttr(
            "use_addto",
            "In order to support new feature (inplace addto strategy) for "
            "gradient accumulation.",
            false));

REGISTER_OP_VERSION(conv3d)
    .AddCheckpoint(
        R"ROC(
      Upgrade conv3d, add a new attribute [use_addto].
    )ROC",
        paddle::framework::compatible::OpVersionDesc().NewAttr(
            "use_addto",
            "In order to support new feature (inplace addto strategy) for "
            "gradient accumulation.",
            false));