conv_op.cc 37.1 KB
Newer Older
1
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
C
chengduoZH 已提交
2

L
Luo Tao 已提交
3 4 5
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
C
chengduoZH 已提交
6

L
Luo Tao 已提交
7
    http://www.apache.org/licenses/LICENSE-2.0
C
chengduoZH 已提交
8

L
Luo Tao 已提交
9 10 11 12 13
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
C
chengduoZH 已提交
14

Y
Yi Wang 已提交
15
#include "paddle/fluid/operators/conv_op.h"
Y
Update  
Yi Wang 已提交
16

17
#include <memory>
Y
Update  
Yi Wang 已提交
18 19 20
#include <string>
#include <vector>

21 22
#include "paddle/fluid/framework/op_version_registry.h"

23 24 25
#ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/platform/cudnn_helper.h"
#endif
26 27 28 29 30

#ifdef PADDLE_WITH_HIP
#include "paddle/fluid/platform/miopen_helper.h"
#endif

31 32 33
#ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/platform/mkldnn_helper.h"
#endif
34
#include "paddle/fluid/platform/cudnn_workspace_helper.h"
C
chengduoZH 已提交
35 36 37 38

namespace paddle {
namespace operators {

39 40
std::vector<int64_t> ConvOp::ComputeOutputShape(
    framework::InferShapeContext* ctx) const {
41 42
  OP_INOUT_CHECK(ctx->HasInput("Input"), "Input", "Input", "Conv");
  OP_INOUT_CHECK(ctx->HasInput("Filter"), "Input", "Filter", "Conv");
C
chengduoZH 已提交
43 44 45

  auto in_dims = ctx->GetInputDim("Input");
  auto filter_dims = ctx->GetInputDim("Filter");
46

C
chengduoZH 已提交
47 48
  std::vector<int> strides = ctx->Attrs().Get<std::vector<int>>("strides");
  std::vector<int> paddings = ctx->Attrs().Get<std::vector<int>>("paddings");
L
liym27 已提交
49 50
  std::string padding_algorithm =
      ctx->Attrs().Get<std::string>("padding_algorithm");
C
chengduoZH 已提交
51
  int groups = ctx->Attrs().Get<int>("groups");
C
chengduoZH 已提交
52
  std::vector<int> dilations = ctx->Attrs().Get<std::vector<int>>("dilations");
L
liym27 已提交
53
  const std::string data_format = ctx->Attrs().Get<std::string>("data_format");
54 55 56 57 58

  // MKL-DNN Kernels are using NCHW order of dims description
  // so we ignore data_format consideration for MKL-DNN kernel
  const bool channel_last = (this->IsMKLDNNType() == false) &&
                            (data_format == "NHWC" || data_format == "NDHWC");
C
chengduoZH 已提交
59

60 61
  PADDLE_ENFORCE_EQ(
      in_dims.size() == 4 || in_dims.size() == 5, true,
62
      platform::errors::InvalidArgument(
63 64
          "The input of Op(Conv) should be a 4-D or 5-D Tensor. But "
          "received: input's dimension is %u, input's shape is [%s].",
65
          in_dims.size(), in_dims));
66

C
chengduoZH 已提交
67 68
  PADDLE_ENFORCE_EQ(
      in_dims.size(), filter_dims.size(),
69
      platform::errors::InvalidArgument(
70 71 72 73
          "The input's dimension and filter's dimension of "
          "Op(Conv) should be equal. But received: the input's shape is [%s], "
          "the input's dimension is %d; the filter's shape is [%s],  "
          "the filter's dimension is %d.",
74
          in_dims, in_dims.size(), filter_dims, filter_dims.size()));
75

76 77 78 79 80 81 82 83 84 85 86
  int stride_size = strides.size();
  for (int i = 0; i < stride_size; ++i) {
    PADDLE_ENFORCE_GT(
        strides[i], 0,
        platform::errors::InvalidArgument(
            "The stride of Op(Conv) should be larget than 0, but received "
            "stride is %d.",
            strides[i]));
  }

  int in_sub_stride_size = in_dims.size() - stride_size;
87 88 89
  PADDLE_ENFORCE_EQ(
      in_dims.size(), strides.size() + 2U,
      platform::errors::InvalidArgument(
90 91 92 93 94 95 96
          "The difference of input's dimension and Attr(strides)'s "
          "length must be euqal to 2 for Op(Conv). "
          "But received: input's dimension is %d, input's shape is [%s]; "
          "Attr(stride)'s length is %d, Attr(stride) is [%s]; "
          "difference of input's dimention and Attr(strides)'s length = %u.",
          in_dims.size(), in_dims, strides.size(),
          framework::make_ddim(strides), in_sub_stride_size));
L
liym27 已提交
97 98 99

  const auto input_channels =
      channel_last ? in_dims[in_dims.size() - 1] : in_dims[1];
F
fengjiayi 已提交
100

101 102
  PADDLE_ENFORCE_EQ(
      input_channels, filter_dims[1] * groups,
103
      platform::errors::InvalidArgument(
104 105 106 107 108
          "The number of input's channels should be equal to filter's channels "
          "* groups for Op(Conv). But received: the input's channels is %d, "
          "the input's shape is [%s]; the filter's channels is %d, the "
          "filter's shape is [%s]; the groups is %d, the data_format is %s. "
          "The error may come from wrong data_format setting.",
109 110
          input_channels, in_dims, filter_dims[1], filter_dims, groups,
          data_format));
C
chengduoZH 已提交
111
  PADDLE_ENFORCE_EQ(
Y
Yang Yu 已提交
112
      filter_dims[0] % groups, 0,
113
      platform::errors::InvalidArgument(
114 115 116 117
          "The number of output's channels (filter's first dimension) of "
          "Op(Conv) should be divided by groups. But received: "
          "the output channels is %d, the filter's shape is [%s], "
          "the groups is %d.",
118
          filter_dims[0], filter_dims, groups));
C
chengduoZH 已提交
119

L
liym27 已提交
120 121 122 123 124 125
  framework::DDim in_data_dims;
  if (channel_last) {
    in_data_dims = framework::slice_ddim(in_dims, 1, in_dims.size() - 1);
  } else {
    in_data_dims = framework::slice_ddim(in_dims, 2, in_dims.size());
  }
126

127 128
  framework::DDim filter_data_dims =
      framework::slice_ddim(filter_dims, 2, filter_dims.size());
129

L
liym27 已提交
130 131 132 133 134 135 136 137
  std::vector<int> ksize = framework::vectorize<int>(filter_data_dims);
  UpdatePaddingAndDilation(&paddings, &dilations, padding_algorithm,
                           in_data_dims, strides, ksize);

  std::vector<int64_t> output_shape({in_dims[0]});
  if (!channel_last) {
    output_shape.push_back(filter_dims[0]);
  }
138
  for (int i = 0; i < in_data_dims.size(); ++i) {
T
tink2123 已提交
139
    if ((!ctx->IsRuntime()) &&
L
liym27 已提交
140
        (in_data_dims[i] <= 0 || filter_dims[i + 2] <= 0)) {
T
tink2123 已提交
141 142
      output_shape.push_back(-1);
    } else {
143 144 145
      output_shape.push_back(
          ConvOutputSize(in_data_dims[i], filter_data_dims[i], dilations[i],
                         paddings[2 * i], paddings[2 * i + 1], strides[i]));
T
tink2123 已提交
146
    }
C
chengduoZH 已提交
147
  }
L
liym27 已提交
148 149 150 151
  if (channel_last) {
    output_shape.push_back(filter_dims[0]);
  }

152
  return output_shape;
C
chengduoZH 已提交
153 154
}

155 156
framework::OpKernelType ConvOp::GetExpectedKernelType(
    const framework::ExecutionContext& ctx) const {
X
Xin Pan 已提交
157 158
  int customized_type_value =
      framework::OpKernelType::kDefaultCustomizedTypeValue;
159
  framework::LibraryType library{framework::LibraryType::kPlain};
M
mozga-intel 已提交
160
  // TODO(pzelazko-intel): enable MKLDNN layout when it's ready
161
  auto input_data_type = OperatorWithKernel::IndicateVarDataType(ctx, "Input");
L
liym27 已提交
162 163
  std::string data_format =
      "AnyLayout";  // todo enable data layout when it's ready
M
mozga-intel 已提交
164 165
  framework::DataLayout layout = framework::StringToDataLayout(data_format);

166
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
167
  if (platform::CanCUDNNBeUsed(ctx)) {
168
    library = framework::LibraryType::kCUDNN;
C
chengduoZH 已提交
169 170
  }
#endif
171
#ifdef PADDLE_WITH_MKLDNN
172 173
  if (library == framework::LibraryType::kPlain &&
      this->CanMKLDNNBeUsed(ctx, input_data_type)) {
174
    library = framework::LibraryType::kMKLDNN;
M
mozga-intel 已提交
175
    layout = framework::DataLayout::kMKLDNN;
176
    customized_type_value =
177 178
        (input_data_type == framework::DataTypeTrait<int8_t>::DataType() ||
         input_data_type == framework::DataTypeTrait<uint8_t>::DataType())
179 180
            ? kConvMKLDNNINT8
            : kConvMKLDNNFP32;
181
  }
182
#endif
183

184
  if (input_data_type != framework::proto::VarType::INT8 &&
185 186
      input_data_type != framework::proto::VarType::UINT8 &&
      input_data_type != framework::proto::VarType::BF16) {
187
    auto filter_data_type = ctx.Input<Tensor>("Filter")->type();
188 189 190 191 192 193 194 195
    PADDLE_ENFORCE_EQ(
        input_data_type, filter_data_type,
        platform::errors::InvalidArgument(
            "input and filter data type should be consistent, "
            "but received input data type is %s and filter type "
            "is %s",
            paddle::framework::DataTypeToString(input_data_type),
            paddle::framework::DataTypeToString(filter_data_type)));
196
  }
K
Kexin Zhao 已提交
197
  if (input_data_type == framework::proto::VarType::FP16) {
198
    PADDLE_ENFORCE_EQ(library, framework::LibraryType::kCUDNN,
199 200
                      platform::errors::InvalidArgument(
                          "float16 can only be used when CUDNN is used"));
K
Kexin Zhao 已提交
201
  }
W
wuhuanzhou 已提交
202 203 204 205 206 207 208 209 210
#if PADDLE_WITH_CUDA
  if (input_data_type == framework::proto::VarType::BF16 &&
      library == framework::LibraryType::kCUDNN) {
    PADDLE_ENFORCE_GE(
        platform::CudnnVersion(), 8100,
        platform::errors::InvalidArgument(
            "bfloat16 can only be used when CUDNN_VERSION >= 8100"));
  }
#endif  // PADDLE_WITH_CUDA
K
Kexin Zhao 已提交
211

212 213 214
  auto type = framework::OpKernelType(input_data_type, ctx.GetPlace(), layout,
                                      library, customized_type_value);
  return type;
215 216
}

217 218 219 220 221 222 223 224 225 226 227 228 229
framework::OpKernelType ConvOp::GetKernelTypeForVar(
    const std::string& var_name, const Tensor& tensor,
    const framework::OpKernelType& expected_kernel_type) const {
#ifdef PADDLE_WITH_MKLDNN
  // Only input require reshaping, weights and
  // bias are having shape in NCHW order
  if ((var_name == "Input") &&
      (expected_kernel_type.data_layout_ == framework::DataLayout::kMKLDNN) &&
      (tensor.layout() != framework::DataLayout::kMKLDNN)) {
    auto attrs = Attrs();
    auto ar = paddle::framework::AttrReader(attrs);
    const std::string data_format = ar.Get<std::string>("data_format");
    auto dl = framework::StringToDataLayout(data_format);
230
    // Some models may have intentionally set "AnyLayout" for conv
231 232
    // op. Treat this as NCHW (default data_format value)
    if (dl != framework::DataLayout::kAnyLayout) {
233 234
      return framework::OpKernelType(expected_kernel_type.data_type_,
                                     tensor.place(), dl);
235 236 237 238 239 240 241
    }
  }
#endif
  return framework::OpKernelType(expected_kernel_type.data_type_,
                                 tensor.place(), tensor.layout());
}

Y
Yu Yang 已提交
242
void Conv2DOpMaker::Make() {
243 244 245 246
  AddAttr<bool>("is_test",
                "(bool, default false) Set to true for inference only, false "
                "for training. Some layers may run faster when this is true.")
      .SetDefault(false);
L
liym27 已提交
247 248 249 250 251 252
  AddInput("Input",
           "(Tensor) The input tensor of convolution operator. "
           "The format of input tensor is NCHW or NHWC, where N is batch size, "
           "C is the "
           "number of channels, H is the height of the feature, "
           "and W is the width of the feature.");
C
chengduoZH 已提交
253
  AddInput("Filter",
C
fix doc  
chengduoZH 已提交
254
           "(Tensor) The filter tensor of convolution operator. "
C
chengduoZH 已提交
255 256
           "The format of the filter tensor is MCHW, where M is the number of "
           "output image channels, C is the number of input image channels, "
C
fix doc  
chengduoZH 已提交
257 258
           "H is the height of the filter, and W is the width of the filter. "
           "If the groups attribute is greater than 1, C equals the number of "
C
chengduoZH 已提交
259
           "input image channels divided by the groups.");
260 261 262 263 264
  AddInput("Bias",
           "(Tensor) Bias to be added to each output of filter application."
           "The format of output tensor is X (one-dimensional) of size equal"
           "to the number of output channels. Only used with MKL-DNN.")
      .AsDispensable();
265 266 267
  AddInput("ResidualData",
           "(Tensor) Tensor with residual data "
           "to which convolution output will be added."
268
           "Used with fuse_residual_connection fusion.")
269
      .AsDispensable();
Y
Yihua Xu 已提交
270 271
  AddOutput("Output",
            "(Tensor) The output tensor of convolution operator. "
L
liym27 已提交
272
            "It has same data fromat and data type as the Input.");
C
chengduoZH 已提交
273 274 275 276
  AddAttr<std::vector<int>>("strides",
                            "(vector<int> default:{1, 1}), the "
                            "strides(h_stride, w_stride) of "
                            "convolution operator.")
C
chengduoZH 已提交
277
      .SetDefault({1, 1});
C
chengduoZH 已提交
278 279
  AddAttr<std::vector<int>>("paddings",
                            "(vector<int> default:{0, 0}), the "
L
liym27 已提交
280 281
                            "paddings(pad_height_top, pad_height_bottom, "
                            "pad_width_left, pad_wifth_right)  of "
C
chengduoZH 已提交
282
                            "convolution operator.")
C
chengduoZH 已提交
283
      .SetDefault({0, 0});
L
liym27 已提交
284 285 286 287 288 289
  AddAttr<std::string>(
      "padding_algorithm",
      "(string, default \"EXPLICIT\") An optional string from: \"EXPLICIT\","
      "\"SAME\",\"VALID\". Set to \"EXPLICIT\" for explicit padding. "
      "Set to \"SAME\" or \"VALID\" for algorithm of padding. ")
      .SetDefault("EXPLICIT");
C
chengduoZH 已提交
290 291
  AddAttr<int>(
      "groups",
C
chengduoZH 已提交
292
      "(int default:1), the groups number of the convolution operator. "
C
fix doc  
chengduoZH 已提交
293 294 295 296
      "According to grouped convolution in Alex Krizhevsky's Deep CNN paper: "
      "when group=2, the first half of the filters is only connected to the "
      "first half of the input channels, while the second half of the filters "
      "is only connected to the second half of the input channels.")
C
chengduoZH 已提交
297
      .SetDefault(1);
C
chengduoZH 已提交
298
  AddAttr<std::vector<int>>("dilations",
C
chengduoZH 已提交
299 300
                            "(vector<int> default:{1, 1}), the "
                            "dilations(h_dilation, w_dilation) of "
C
chengduoZH 已提交
301
                            "convolution operator.")
C
chengduoZH 已提交
302
      .SetDefault({1, 1});
303 304 305 306
  AddAttr<bool>(
      "use_cudnn",
      "(bool, default false) Only used in cudnn kernel, need install cudnn")
      .SetDefault(false);
307 308 309
  AddAttr<bool>("fuse_relu_before_depthwise_conv",
                "(bool, default false) Only used in cuda depthwise kernel")
      .SetDefault(false);
310 311 312
  AddAttr<bool>("use_mkldnn",
                "(bool, default false) Only used in mkldnn kernel")
      .SetDefault(false);
313 314 315 316
  AddAttr<bool>(
      "use_quantizer",
      "(bool, default false) "
      "This parameter is no longer used. Use 'mkldnn_data_type' instead.")
317
      .SetDefault(false);
318 319 320 321 322
  AddAttr<std::string>(
      "mkldnn_data_type",
      "(string, default \"float32\"). Data type of mkldnn kernel")
      .SetDefault("float32")
      .InEnum({"float32", "int8", "bfloat16"});
M
Michal Gallus 已提交
323 324
  AddAttr<bool>("fuse_relu", "(bool, default false) Only used in mkldnn kernel")
      .SetDefault(false);
325 326 327 328 329 330
  AddAttr<bool>("fuse_brelu",
                "(bool, default false) Only used in mkldnn kernel")
      .SetDefault(false);
  AddAttr<float>("fuse_brelu_threshold",
                 "(float, default false 6.0) Only used in mkldnn kernel")
      .SetDefault(6.0f);
331 332 333 334 335 336 337 338
  AddAttr<std::string>("fuse_activation",
                       "(string, default \"\") Only used in mkldnn kernel")
      .SetDefault("");
  AddAttr<float>("fuse_alpha",
                 "(float, default 0.0) Only used in mkldnn kernel")
      .SetDefault(0.0f);
  AddAttr<float>("fuse_beta", "(float, default 0.0) Only used in mkldnn kernel")
      .SetDefault(0.0f);
339 340 341 342 343
  AddAttr<bool>(
      "use_addto",
      "(bool, default false) If use addto strategy or not, only used in "
      "cudnn kernel")
      .SetDefault(false);
344
  AddAttr<bool>("fuse_residual_connection",
345
                "(bool, default false) Only used in mkldnn kernel. Used "
346 347
                "whenever convolution output is as an input to residual "
                "connection.")
348
      .SetDefault(false);
349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368
  AddAttr<float>("Scale_in",
                 "Scale_in to be used for int8 input data."
                 "Only used with MKL-DNN INT8.")
      .SetDefault(1.0f);
  AddAttr<float>("Scale_out",
                 "Scale_out to be used for int8 output data."
                 "Only used with MKL-DNN INT8.")
      .SetDefault(1.0f);
  AddAttr<float>("Scale_in_eltwise",
                 "Scale_in_eltwise to be used for int8 eltwise input data."
                 "Only used with MKL-DNN INT8.")
      .SetDefault(1.0f);
  AddAttr<std::vector<float>>("Scale_weights",
                              "Scale_weights to be used for int8 weights data."
                              "Only used with MKL-DNN INT8.")
      .SetDefault({1.0f});
  AddAttr<bool>("force_fp32_output",
                "(bool, default false) Force INT8 kernel output FP32, only "
                "used in MKL-DNN INT8")
      .SetDefault(false);
369 370 371 372 373 374
  AddAttr<std::string>(
      "data_format",
      "(string, default NCHW) Only used in "
      "An optional string from: \"NHWC\", \"NCHW\". "
      "Defaults to \"NHWC\". Specify the data format of the output data, "
      "the input will be transformed automatically. ")
L
liym27 已提交
375
      .SetDefault("NCHW");
376 377 378 379 380 381 382 383
  // TODO(dzhwinter): need to registered layout transform function
  AddAttr<int>("workspace_size_MB",
               "Only used in cudnn kernel. Need set use_cudnn to true."
               "workspace size for cudnn, in MB, "
               "workspace is a section of GPU memory which will be "
               "allocated/freed each time the operator runs, larger "
               "workspace size can increase performance but also requires "
               "better hardware. This size should be chosen carefully.")
384
      .SetDefault(platform::GetDefaultConvWorkspaceSizeLimitMB());
385 386
  AddAttr<bool>("exhaustive_search",
                "(bool, default false) cuDNN has many algorithm to calculation "
C
chengduo 已提交
387
                "convolution, whether enable exhaustive search "
翟飞跃 已提交
388
                "for cuDNN convolution or not, default is False.")
389
      .SetDefault(false);
L
liym27 已提交
390

C
chengduoZH 已提交
391
  AddComment(R"DOC(
C
fix doc  
chengduoZH 已提交
392 393
Convolution Operator.

C
chengduoZH 已提交
394
The convolution operation calculates the output based on the input, filter
C
chengduoZH 已提交
395
and strides, paddings, dilations, groups parameters. The size of each dimension of the
C
chengduoZH 已提交
396
parameters is checked in the infer-shape.
L
liym27 已提交
397
Input(Input) and Output(Output) are in NCHW or NHWC format. Where N is batch
C
fix doc  
chengduoZH 已提交
398
size, C is the number of channels, H is the height of the feature, and W is
C
chengduoZH 已提交
399
the width of the feature.
400
Filters(Input) is MCHW format format. Where M is the number of output image channels, C is
C
chengduoZH 已提交
401 402 403 404
the number of input image channels, H is the height of the filter, and W
is the width of the filter.
Parameters(strides, paddings, dilations) are two elements. These two elements represent
height and width, respectively.
C
chengduoZH 已提交
405 406 407 408
The input(X) size and output(Out) size may be different.

Example:
  Input:
C
chengduoZH 已提交
409 410
       Input shape: $(N, C_{in}, H_{in}, W_{in})$
       Filter shape: $(C_{out}, C_{in}, H_f, W_f)$
C
chengduoZH 已提交
411
  Output:
C
chengduoZH 已提交
412 413 414
       Output shape: $(N, C_{out}, H_{out}, W_{out})$
  Where
$$
L
liym27 已提交
415 416
       H_{out}= \frac{(H_{in} + pad_height_top + pad_height_bottom - (dilations[0] * (H_f - 1) + 1))}{strides[0]}+ 1 \\
       W_{out}= \frac{(W_{in} + pad_width_left + pad_width_right - (dilations[1] * (W_f - 1) + 1))}{strides[1]}+ 1
C
chengduoZH 已提交
417
$$
C
chengduoZH 已提交
418
)DOC");
Q
qingqing01 已提交
419
  Apply();
C
chengduoZH 已提交
420 421
}

Y
Yu Yang 已提交
422
void Conv3DOpMaker::Make() {
423 424 425 426
  AddAttr<bool>("is_test",
                "(bool, default false) Set to true for inference only, false "
                "for training. Some layers may run faster when this is true.")
      .SetDefault(false);
C
chengduoZH 已提交
427 428
  AddInput(
      "Input",
C
fix doc  
chengduoZH 已提交
429
      "(Tensor) The input tensor of convolution operator. "
L
liym27 已提交
430 431
      "The format of input tensor is NCDHW or NDHWC. Where N is batch size, C "
      "is the "
C
fix doc  
chengduoZH 已提交
432 433 434
      "number of channels, D is the depth of the feature, H is the height of "
      "the feature, "
      "and W is the width of the feature.");
C
chengduoZH 已提交
435
  AddInput("Filter",
C
fix doc  
chengduoZH 已提交
436
           "(Tensor) The filter tensor of convolution operator. "
C
chengduoZH 已提交
437 438
           "The format of the filter tensor is MCDHW, where M is the number of "
           "output image channels, C is the number of input image channels, "
C
fix doc  
chengduoZH 已提交
439 440 441
           "D is the depth of the filter, H is the height of the filter, and W "
           "is the width of the filter."
           "If the groups attribute is greater than 1, C equals the number of "
C
chengduoZH 已提交
442
           "input image channels divided by the groups.");
443 444 445 446 447
  AddInput("ResidualData",
           "(Tensor) Tensor with residual data "
           "to which convolution output will be added."
           "Used with fuse_residual_connection fusion.")
      .AsDispensable();
Y
Yihua Xu 已提交
448 449
  AddOutput("Output",
            "(Tensor) The output tensor of convolution operator."
L
liym27 已提交
450
            "It has same data fromat and data type as the Input.");
C
chengduoZH 已提交
451 452 453 454
  AddAttr<std::vector<int>>("strides",
                            "(vector<int>, default:{1, 1, 1}), the "
                            "strides(d_stride, h_stride, w_stride) of "
                            "convolution operator.")
C
chengduoZH 已提交
455
      .SetDefault({1, 1, 1});
L
liym27 已提交
456 457 458 459 460 461
  AddAttr<std::vector<int>>(
      "paddings",
      "(vector<int>, default:{0, 0, 0}), the "
      "paddings(pad_depth_front, pad_depth_back, pad_height_top, "
      "pad_height_bottom, pad_width_left, pad_width_right) of convolution "
      "operator.")
C
chengduoZH 已提交
462
      .SetDefault({0, 0, 0});
L
liym27 已提交
463 464 465 466 467 468
  AddAttr<std::string>(
      "padding_algorithm",
      "(string, default \"EXPLICIT\") An optional string from: \"EXPLICIT\","
      "\"SAME\",\"VALID\". Set to \"EXPLICIT\" for explicit padding. "
      "Set to \"SAME\" or \"VALID\" for algorithm of padding. ")
      .SetDefault("EXPLICIT");
C
chengduoZH 已提交
469 470
  AddAttr<int>(
      "groups",
C
chengduoZH 已提交
471
      "(int default:1), the groups number of the convolution operator. "
C
fix doc  
chengduoZH 已提交
472 473 474 475
      "According to grouped convolution in Alex Krizhevsky's Deep CNN paper: "
      "when group=2, the first half of the filters is only connected to the "
      "first half of the input channels, while the second half of the filters "
      "is only connected to the second half of the input channels.")
C
chengduoZH 已提交
476
      .SetDefault(1);
C
chengduoZH 已提交
477
  AddAttr<std::vector<int>>("dilations",
C
chengduoZH 已提交
478 479
                            "(vector<int> default:{1, 1, 1}), the "
                            "dilations(d_dilation, h_dilation, w_dilation) of "
C
chengduoZH 已提交
480
                            "convolution operator.")
C
chengduoZH 已提交
481
      .SetDefault({1, 1, 1});
482 483 484 485
  AddAttr<bool>(
      "use_cudnn",
      "(bool, default false) Only used in cudnn kernel, need install cudnn")
      .SetDefault(false);
486 487 488
  AddAttr<bool>("use_mkldnn",
                "(bool, default false) Only used in mkldnn kernel")
      .SetDefault(false);
489 490 491 492 493
  AddAttr<std::string>(
      "mkldnn_data_type",
      "(string, default \"float32\"). Data type of mkldnn kernel")
      .SetDefault("float32")
      .InEnum({"float32", "int8", "bfloat16"});
494 495
  AddAttr<bool>("fuse_relu", "(bool, default false) Only used in mkldnn kernel")
      .SetDefault(false);
496 497 498 499 500 501 502 503
  AddAttr<std::string>("fuse_activation",
                       "(string, default \"\") Only used in mkldnn kernel")
      .SetDefault("");
  AddAttr<float>("fuse_alpha",
                 "(float, default 0.0) Only used in mkldnn kernel")
      .SetDefault(0.0f);
  AddAttr<float>("fuse_beta", "(float, default 0.0) Only used in mkldnn kernel")
      .SetDefault(0.0f);
504 505 506 507 508
  AddAttr<bool>(
      "use_addto",
      "(bool, default false) If use addto strategy or not, only used in "
      "cudnn kernel")
      .SetDefault(false);
509 510 511 512 513
  AddAttr<bool>("fuse_residual_connection",
                "(bool, default false) Only used in mkldnn kernel. Used "
                "whenever convolution output is as an input to residual "
                "connection.")
      .SetDefault(false);
514 515
  AddAttr<std::string>(
      "data_format",
L
liym27 已提交
516 517 518
      "(string, default NCDHW) Only used in "
      "An optional string from: \"NDHWC\", \"NCDHW\". "
      "Defaults to \"NDHWC\". Specify the data format of the output data, "
519
      "the input will be transformed automatically. ")
L
liym27 已提交
520
      .SetDefault("NCDHW");
521 522 523
  AddAttr<bool>("force_fp32_output",
                "(bool, default false) Only used in mkldnn INT8 kernel")
      .SetDefault(false);
524 525 526 527 528 529 530
  // TODO(dzhwinter): need to registered layout transform function
  AddAttr<int>("workspace_size_MB",
               "Only used in cudnn kernel. workspace size for cudnn, in MB, "
               "workspace is a section of GPU memory which will be "
               "allocated/freed each time the operator runs, larger "
               "workspace size can increase performance but also requires "
               "better hardware. This size should be chosen carefully.")
531
      .SetDefault(platform::GetDefaultConvWorkspaceSizeLimitMB());
532 533
  AddAttr<bool>("exhaustive_search",
                "(bool, default false) cuDNN has many algorithm to calculation "
C
chengduo 已提交
534
                "convolution, whether enable exhaustive search "
翟飞跃 已提交
535
                "for cuDNN convolution or not, default is False.")
536
      .SetDefault(false);
C
chengduoZH 已提交
537
  AddComment(R"DOC(
C
fix doc  
chengduoZH 已提交
538 539
Convolution3D Operator.

C
chengduoZH 已提交
540
The convolution operation calculates the output based on the input, filter
C
chengduoZH 已提交
541
and strides, paddings, dilations, groups parameters. The size of each dimension of the
C
chengduoZH 已提交
542
parameters is checked in the infer-shape.
L
liym27 已提交
543
Input(Input) and output(Output) are in NCDHW or NDHWC format, where N is batch
C
fix doc  
chengduoZH 已提交
544
size, C is the number of channels,D is the depth of the feature, H is the height of
C
chengduoZH 已提交
545 546 547 548 549 550
the feature, and W is the width of the feature.
Filters(Input) is MCDHW format, where M is the number of output image channels,
C is the number of input image channels, D is the depth of the filter,
H is the height of the filter, and W is the width of the filter.
Parameters(strides, paddings, dilations) are three elements. These three elements
represent depth, height and width, respectively.
C
fix doc  
chengduoZH 已提交
551 552 553 554
The input(X) size and output(Out) size may be different.

Example:
  Input:
C
chengduoZH 已提交
555 556
       Input shape: $(N, C_{in}, D_{in}, H_{in}, W_{in})$
       Filter shape: $(C_{out}, C_{in}, D_f, H_f, W_f)$
C
fix doc  
chengduoZH 已提交
557
  Output:
C
chengduoZH 已提交
558 559 560
       Output shape: $(N, C_{out}, D_{out}, H_{out}, W_{out})$
  Where
  $$
L
liym27 已提交
561 562 563
       D_{out}= \frac{(D_{in} + pad_depth_front + pad_depth_back - (dilations[0] * (D_f - 1) + 1))}{ strides[0]}+ 1 \\
       H_{out}= \frac{(H_{in} + pad_height_top + pad_height_bottom - (dilations[1] * (H_f - 1) + 1))}{ strides[1]}+ 1 \\
       W_{out}= \frac{(W_{in} + pad_width_left + pad_width_right - (dilations[2] * (W_f - 1) + 1))}{ strides[2]}+ 1
C
chengduoZH 已提交
564
  $$
C
chengduoZH 已提交
565
)DOC");
Q
qingqing01 已提交
566
  Apply();
C
chengduoZH 已提交
567 568
}

C
chengduoZH 已提交
569 570 571 572 573 574 575 576 577 578 579
void ConvOpGrad::InferShape(framework::InferShapeContext* ctx) const {
  auto in_dims = ctx->GetInputDim("Input");
  auto filter_dims = ctx->GetInputDim("Filter");
  if (ctx->HasOutput(framework::GradVarName("Input"))) {
    ctx->SetOutputDim(framework::GradVarName("Input"), in_dims);
  }
  if (ctx->HasOutput(framework::GradVarName("Filter"))) {
    ctx->SetOutputDim(framework::GradVarName("Filter"), filter_dims);
  }
}

580 581
framework::OpKernelType ConvOpGrad::GetExpectedKernelType(
    const framework::ExecutionContext& ctx) const {
X
Xin Pan 已提交
582 583
  int customized_type_value =
      framework::OpKernelType::kDefaultCustomizedTypeValue;
584
  framework::LibraryType library_{framework::LibraryType::kPlain};
M
mozga-intel 已提交
585
  // TODO(pzelazko-intel): enable MKLDNN layout when it's ready
L
liym27 已提交
586
  std::string data_format = "AnyLayout";
M
mozga-intel 已提交
587
  framework::DataLayout layout_ = framework::StringToDataLayout(data_format);
588
  auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "Input");
M
mozga-intel 已提交
589

590
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
591 592
  if (platform::CanCUDNNBeUsed(ctx)) {
    library_ = framework::LibraryType::kCUDNN;
C
chengduoZH 已提交
593 594
  }
#endif
595 596
#ifdef PADDLE_WITH_MKLDNN
  if (library_ == framework::LibraryType::kPlain &&
597
      this->CanMKLDNNBeUsed(ctx, data_type)) {
598
    const std::string data_format = ctx.Attr<std::string>("data_format");
599
    library_ = framework::LibraryType::kMKLDNN;
M
mozga-intel 已提交
600
    layout_ = framework::DataLayout::kMKLDNN;
X
Xin Pan 已提交
601
    customized_type_value = kConvMKLDNNFP32;
602
  }
603
#endif
604

605 606
  auto type = framework::OpKernelType(data_type, ctx.GetPlace(), layout_,
                                      library_, customized_type_value);
607
  return type;
608 609
}

610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635
framework::OpKernelType ConvOpGrad::GetKernelTypeForVar(
    const std::string& var_name, const Tensor& tensor,
    const framework::OpKernelType& expected_kernel_type) const {
#ifdef PADDLE_WITH_MKLDNN
  // Only input require reshaping, weights and
  // bias are having shape in NCHW order
  if (((var_name == "Input") ||
       (var_name == framework::GradVarName("Output"))) &&
      (expected_kernel_type.data_layout_ == framework::DataLayout::kMKLDNN) &&
      (tensor.layout() != framework::DataLayout::kMKLDNN)) {
    auto attrs = Attrs();
    auto ar = paddle::framework::AttrReader(attrs);
    const std::string data_format = ar.Get<std::string>("data_format");
    auto dl = framework::StringToDataLayout(data_format);
    // Some models may have intentionally set "AnyLayout" for pool
    // op. Treat this as NCHW (default data_format value)
    if (dl != framework::DataLayout::kAnyLayout) {
      return framework::OpKernelType(expected_kernel_type.data_type_,
                                     tensor.place(), dl);
    }
  }
#endif
  return framework::OpKernelType(expected_kernel_type.data_type_,
                                 tensor.place(), tensor.layout());
}

H
hong 已提交
636 637
template <typename T>
class Conv2DGradMaker : public framework::SingleGradOpMaker<T> {
638
 public:
H
hong 已提交
639
  using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
640

641
  void Apply(GradOpPtr<T> op) const override {
S
sneaxiy 已提交
642
    op->SetType(this->ForwardOpType() + "_grad");
H
hong 已提交
643 644 645 646
    op->SetInput("Input", this->Input("Input"));
    op->SetInput("Filter", this->Input("Filter"));
    op->SetInput("Bias", this->Input("Bias"));
    op->SetInput(framework::GradVarName("Output"), this->OutputGrad("Output"));
647

H
hong 已提交
648 649 650 651
    op->SetOutput(framework::GradVarName("Input"), this->InputGrad("Input"));
    op->SetOutput(framework::GradVarName("Filter"), this->InputGrad("Filter"));
    op->SetOutput(framework::GradVarName("Bias"), this->InputGrad("Bias"));
    op->SetAttrMap(this->Attrs());
652
  }
S
sneaxiy 已提交
653 654
};

H
hong 已提交
655 656
template <typename T>
class Conv3DGradMaker : public framework::SingleGradOpMaker<T> {
S
sneaxiy 已提交
657
 public:
H
hong 已提交
658
  using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
659

660
  void Apply(GradOpPtr<T> op) const override {
S
sneaxiy 已提交
661
    op->SetType(this->ForwardOpType() + "_grad");
H
hong 已提交
662 663 664
    op->SetInput("Input", this->Input("Input"));
    op->SetInput("Filter", this->Input("Filter"));
    op->SetInput(framework::GradVarName("Output"), this->OutputGrad("Output"));
S
sneaxiy 已提交
665

H
hong 已提交
666 667
    op->SetOutput(framework::GradVarName("Input"), this->InputGrad("Input"));
    op->SetOutput(framework::GradVarName("Filter"), this->InputGrad("Filter"));
S
sneaxiy 已提交
668

H
hong 已提交
669 670
    if (this->HasInput("ResidualData")) {
      op->SetInput("ResidualData", this->Input("ResidualData"));
S
sneaxiy 已提交
671 672
    }

H
hong 已提交
673
    op->SetAttrMap(this->Attrs());
674 675 676
  }
};

Q
qingqing01 已提交
677 678 679 680
/*
 * Inputs:  I, W, dO, ddI, ddW
 * Outputs: ddO, dW, dI
 */
H
hong 已提交
681 682
template <typename T>
class Conv2DDoubleGradMaker : public framework::SingleGradOpMaker<T> {
Q
qingqing01 已提交
683
 public:
H
hong 已提交
684
  using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
Q
qingqing01 已提交
685

686
  void Apply(GradOpPtr<T> op) const override {
Q
qingqing01 已提交
687 688
    op->SetType(this->ForwardOpType() + "_grad");
    // I, W, dO, ddI, ddW
H
hong 已提交
689 690 691 692 693 694
    op->SetInput("Input", this->Input("Input"));
    op->SetInput("Filter", this->Input("Filter"));
    op->SetInput("DOutput", this->Input(framework::GradVarName("Output")));
    op->SetInput("DDInput", this->OutputGrad(framework::GradVarName("Input")));
    op->SetInput("DDFilter",
                 this->OutputGrad(framework::GradVarName("Filter")));
Q
qingqing01 已提交
695 696 697 698

    // ddO, dI, dW
    // Unlike grad op, double grad op does not use name@GRAD@GRAD
    // as key of ops' inputs and outputs.
H
hong 已提交
699 700
    auto ddx = this->OutputGrad(framework::GradVarName("Input"));
    auto ddw = this->OutputGrad(framework::GradVarName("Filter"));
701

L
lvmengsi 已提交
702
    op->SetOutput("DDOutput",
H
hong 已提交
703
                  ddx.empty()
704
                      ? this->EmptyInputGrad()
H
hong 已提交
705
                      : this->InputGrad(framework::GradVarName("Output")));
706 707 708 709
    op->SetOutput("DFilter", ddx.empty() ? this->EmptyInputGrad()
                                         : this->InputGrad("Filter"));
    op->SetOutput("DInput", ddw.empty() ? this->EmptyInputGrad()
                                        : this->InputGrad("Input"));
710

H
hong 已提交
711
    op->SetAttrMap(this->Attrs());
Q
qingqing01 已提交
712 713 714
  }
};

L
lvmengsi 已提交
715 716 717 718
/*
 * Inputs:  I, W, dO, ddI, ddW
 * Outputs: ddO, dW, dI
 */
H
hong 已提交
719 720
template <typename T>
class Conv3DDoubleGradMaker : public framework::SingleGradOpMaker<T> {
L
lvmengsi 已提交
721
 public:
H
hong 已提交
722
  using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
L
lvmengsi 已提交
723

724
  void Apply(GradOpPtr<T> op) const override {
L
lvmengsi 已提交
725 726
    op->SetType(this->ForwardOpType() + "_grad");
    // I, W, dO, ddI, ddW
H
hong 已提交
727 728 729 730 731 732
    op->SetInput("Input", this->Input("Input"));
    op->SetInput("Filter", this->Input("Filter"));
    op->SetInput("DOutput", this->Input(framework::GradVarName("Output")));
    op->SetInput("DDInput", this->OutputGrad(framework::GradVarName("Input")));
    op->SetInput("DDFilter",
                 this->OutputGrad(framework::GradVarName("Filter")));
L
lvmengsi 已提交
733

H
hong 已提交
734 735
    auto ddx = this->OutputGrad(framework::GradVarName("Input"));
    auto ddw = this->OutputGrad(framework::GradVarName("Filter"));
L
lvmengsi 已提交
736

L
lvmengsi 已提交
737
    op->SetOutput("DDOutput",
H
hong 已提交
738
                  ddx.empty()
739
                      ? this->EmptyInputGrad()
H
hong 已提交
740
                      : this->InputGrad(framework::GradVarName("Output")));
741 742 743 744
    op->SetOutput("DFilter", ddx.empty() ? this->EmptyInputGrad()
                                         : this->InputGrad("Filter"));
    op->SetOutput("DInput", ddw.empty() ? this->EmptyInputGrad()
                                        : this->InputGrad("Input"));
L
lvmengsi 已提交
745

H
hong 已提交
746
    op->SetAttrMap(this->Attrs());
L
lvmengsi 已提交
747 748 749
  }
};

Q
qingqing01 已提交
750 751 752 753 754
void ConvOpDoubleGrad::InferShape(framework::InferShapeContext* ctx) const {
  auto x_dims = ctx->GetInputDim("Input");
  auto w_dims = ctx->GetInputDim("Filter");
  auto do_dims = ctx->GetInputDim("DOutput");

L
lvmengsi 已提交
755 756
  if (ctx->HasOutput("DDOutput") &&
      (ctx->HasInput("DDInput") || (ctx->HasInput("DDFilter")))) {
Q
qingqing01 已提交
757 758
    ctx->SetOutputDim("DDOutput", do_dims);
  }
759
  if (ctx->HasOutput("DFilter") && ctx->HasInput("DDInput")) {
Q
qingqing01 已提交
760 761
    ctx->SetOutputDim("DFilter", w_dims);
  }
762
  if (ctx->HasOutput("DInput") && ctx->HasInput("DDFilter")) {
Q
qingqing01 已提交
763 764 765 766 767 768 769 770 771
    ctx->SetOutputDim("DInput", x_dims);
  }
}

framework::OpKernelType ConvOpDoubleGrad::GetExpectedKernelType(
    const framework::ExecutionContext& ctx) const {
  int customized_type_value =
      framework::OpKernelType::kDefaultCustomizedTypeValue;
  framework::LibraryType library_{framework::LibraryType::kPlain};
L
liym27 已提交
772
  std::string data_format = "AnyLayout";
Q
qingqing01 已提交
773 774
  framework::DataLayout layout_ = framework::StringToDataLayout(data_format);

775
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
Q
qingqing01 已提交
776 777
  if (platform::CanCUDNNBeUsed(ctx)) {
    library_ = framework::LibraryType::kCUDNN;
L
lvmengsi 已提交
778
  }
Q
qingqing01 已提交
779
#endif
780 781 782
  auto type = framework::OpKernelType(
      OperatorWithKernel::IndicateVarDataType(ctx, "Input"), ctx.GetPlace(),
      layout_, library_, customized_type_value);
Q
qingqing01 已提交
783 784 785
  return type;
}

C
chengduoZH 已提交
786 787 788 789
}  // namespace operators
}  // namespace paddle

namespace ops = paddle::operators;
Y
Yang Yang 已提交
790
REGISTER_OPERATOR(conv2d, ops::ConvOp, ops::Conv2DOpMaker,
H
hong 已提交
791 792 793 794 795 796
                  ops::ConvOpInferVarType,
                  ops::Conv2DGradMaker<paddle::framework::OpDesc>,
                  ops::Conv2DGradMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(conv2d_grad, ops::ConvOpGrad,
                  ops::Conv2DDoubleGradMaker<paddle::framework::OpDesc>,
                  ops::Conv2DDoubleGradMaker<paddle::imperative::OpBase>);
Q
qingqing01 已提交
797
REGISTER_OPERATOR(conv2d_grad_grad, ops::ConvOpDoubleGrad);
798 799

// depthwise convolution op
Y
Yang Yang 已提交
800
REGISTER_OPERATOR(depthwise_conv2d, ops::ConvOp, ops::Conv2DOpMaker,
H
hong 已提交
801 802 803
                  ops::ConvOpInferVarType,
                  ops::Conv2DGradMaker<paddle::framework::OpDesc>,
                  ops::Conv2DGradMaker<paddle::imperative::OpBase>);
804 805 806 807
REGISTER_OPERATOR(depthwise_conv2d_grad, ops::ConvOpGrad,
                  ops::Conv2DDoubleGradMaker<paddle::framework::OpDesc>,
                  ops::Conv2DDoubleGradMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(depthwise_conv2d_grad_grad, ops::ConvOpDoubleGrad);
C
chengduo 已提交
808

Y
Yang Yang 已提交
809
REGISTER_OPERATOR(conv3d, ops::ConvOp, ops::Conv3DOpMaker,
H
hong 已提交
810 811 812 813 814 815
                  ops::ConvOpInferVarType,
                  ops::Conv3DGradMaker<paddle::framework::OpDesc>,
                  ops::Conv3DGradMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(conv3d_grad, ops::ConvOpGrad,
                  ops::Conv3DDoubleGradMaker<paddle::framework::OpDesc>,
                  ops::Conv3DDoubleGradMaker<paddle::imperative::OpBase>);
L
lvmengsi 已提交
816
REGISTER_OPERATOR(conv3d_grad_grad, ops::ConvOpDoubleGrad);
C
chengduoZH 已提交
817

818 819
// depthwise conv kernel
// TODO(xingzhaolong): neon kernel for mobile
Z
zlx 已提交
820
REGISTER_OP_CPU_KERNEL(
821
    depthwise_conv2d,
X
xzl 已提交
822 823 824 825
    ops::GemmConvKernel<paddle::platform::CPUDeviceContext, float>,
    ops::GemmConvKernel<paddle::platform::CPUDeviceContext, double>);

REGISTER_OP_CPU_KERNEL(
826
    depthwise_conv2d_grad,
X
xzl 已提交
827 828
    ops::GemmConvGradKernel<paddle::platform::CPUDeviceContext, float>,
    ops::GemmConvGradKernel<paddle::platform::CPUDeviceContext, double>);
Z
zlx 已提交
829

C
chengduoZH 已提交
830
REGISTER_OP_CPU_KERNEL(
Q
QI JUN 已提交
831 832 833 834 835 836
    conv2d, ops::GemmConvKernel<paddle::platform::CPUDeviceContext, float>,
    ops::GemmConvKernel<paddle::platform::CPUDeviceContext, double>);
REGISTER_OP_CPU_KERNEL(
    conv2d_grad,
    ops::GemmConvGradKernel<paddle::platform::CPUDeviceContext, float>,
    ops::GemmConvGradKernel<paddle::platform::CPUDeviceContext, double>);
L
lvmengsi 已提交
837 838 839 840
REGISTER_OP_CPU_KERNEL(
    conv2d_grad_grad,
    ops::GemmConvDoubleGradKernel<paddle::platform::CPUDeviceContext, float>,
    ops::GemmConvDoubleGradKernel<paddle::platform::CPUDeviceContext, double>);
C
chengduoZH 已提交
841 842

REGISTER_OP_CPU_KERNEL(
Q
QI JUN 已提交
843 844 845 846 847 848
    conv3d, ops::GemmConvKernel<paddle::platform::CPUDeviceContext, float>,
    ops::GemmConvKernel<paddle::platform::CPUDeviceContext, double>);
REGISTER_OP_CPU_KERNEL(
    conv3d_grad,
    ops::GemmConvGradKernel<paddle::platform::CPUDeviceContext, float>,
    ops::GemmConvGradKernel<paddle::platform::CPUDeviceContext, double>);
L
lvmengsi 已提交
849 850 851 852
REGISTER_OP_CPU_KERNEL(
    conv3d_grad_grad,
    ops::GemmConvDoubleGradKernel<paddle::platform::CPUDeviceContext, float>,
    ops::GemmConvDoubleGradKernel<paddle::platform::CPUDeviceContext, double>);
853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885

REGISTER_OP_VERSION(conv2d)
    .AddCheckpoint(
        R"ROC(
      Upgrade conv2d, add a new attribute [use_addto].
    )ROC",
        paddle::framework::compatible::OpVersionDesc().NewAttr(
            "use_addto",
            "In order to support new feature (inplace addto strategy) for "
            "gradient accumulation.",
            false));

REGISTER_OP_VERSION(depthwise_conv2d)
    .AddCheckpoint(
        R"ROC(
      Upgrade depthwise_conv2d, add a new attribute [use_addto].
    )ROC",
        paddle::framework::compatible::OpVersionDesc().NewAttr(
            "use_addto",
            "In order to support new feature (inplace addto strategy) for "
            "gradient accumulation.",
            false));

REGISTER_OP_VERSION(conv3d)
    .AddCheckpoint(
        R"ROC(
      Upgrade conv3d, add a new attribute [use_addto].
    )ROC",
        paddle::framework::compatible::OpVersionDesc().NewAttr(
            "use_addto",
            "In order to support new feature (inplace addto strategy) for "
            "gradient accumulation.",
            false));