conv_op.cc 37.7 KB
Newer Older
1
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
C
chengduoZH 已提交
2

L
Luo Tao 已提交
3 4 5
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
C
chengduoZH 已提交
6

L
Luo Tao 已提交
7
    http://www.apache.org/licenses/LICENSE-2.0
C
chengduoZH 已提交
8

L
Luo Tao 已提交
9 10 11 12 13
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
C
chengduoZH 已提交
14

Y
Yi Wang 已提交
15
#include "paddle/fluid/operators/conv_op.h"
Y
Update  
Yi Wang 已提交
16

17
#include <memory>
Y
Update  
Yi Wang 已提交
18 19 20
#include <string>
#include <vector>

21 22
#include "paddle/fluid/framework/op_version_registry.h"

23 24 25
#ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/platform/cudnn_helper.h"
#endif
26 27 28 29 30

#ifdef PADDLE_WITH_HIP
#include "paddle/fluid/platform/miopen_helper.h"
#endif

31 32 33
#ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/platform/mkldnn_helper.h"
#endif
34
#include "paddle/fluid/platform/cudnn_workspace_helper.h"
C
chengduoZH 已提交
35 36 37 38

namespace paddle {
namespace operators {

39 40
std::vector<int64_t> ConvOp::ComputeOutputShape(
    framework::InferShapeContext* ctx) const {
41 42
  OP_INOUT_CHECK(ctx->HasInput("Input"), "Input", "Input", "Conv");
  OP_INOUT_CHECK(ctx->HasInput("Filter"), "Input", "Filter", "Conv");
C
chengduoZH 已提交
43 44 45

  auto in_dims = ctx->GetInputDim("Input");
  auto filter_dims = ctx->GetInputDim("Filter");
46

C
chengduoZH 已提交
47 48
  std::vector<int> strides = ctx->Attrs().Get<std::vector<int>>("strides");
  std::vector<int> paddings = ctx->Attrs().Get<std::vector<int>>("paddings");
L
liym27 已提交
49 50
  std::string padding_algorithm =
      ctx->Attrs().Get<std::string>("padding_algorithm");
C
chengduoZH 已提交
51
  int groups = ctx->Attrs().Get<int>("groups");
C
chengduoZH 已提交
52
  std::vector<int> dilations = ctx->Attrs().Get<std::vector<int>>("dilations");
L
liym27 已提交
53
  const std::string data_format = ctx->Attrs().Get<std::string>("data_format");
54 55 56 57 58

  // MKL-DNN Kernels are using NCHW order of dims description
  // so we ignore data_format consideration for MKL-DNN kernel
  const bool channel_last = (this->IsMKLDNNType() == false) &&
                            (data_format == "NHWC" || data_format == "NDHWC");
C
chengduoZH 已提交
59

60 61
  PADDLE_ENFORCE_EQ(
      in_dims.size() == 4 || in_dims.size() == 5, true,
62
      platform::errors::InvalidArgument(
63 64
          "The input of Op(Conv) should be a 4-D or 5-D Tensor. But "
          "received: input's dimension is %u, input's shape is [%s].",
65
          in_dims.size(), in_dims));
66

C
chengduoZH 已提交
67 68
  PADDLE_ENFORCE_EQ(
      in_dims.size(), filter_dims.size(),
69
      platform::errors::InvalidArgument(
70 71 72 73
          "The input's dimension and filter's dimension of "
          "Op(Conv) should be equal. But received: the input's shape is [%s], "
          "the input's dimension is %d; the filter's shape is [%s],  "
          "the filter's dimension is %d.",
74
          in_dims, in_dims.size(), filter_dims, filter_dims.size()));
75

76 77 78 79 80 81 82 83 84 85 86
  int stride_size = strides.size();
  for (int i = 0; i < stride_size; ++i) {
    PADDLE_ENFORCE_GT(
        strides[i], 0,
        platform::errors::InvalidArgument(
            "The stride of Op(Conv) should be larget than 0, but received "
            "stride is %d.",
            strides[i]));
  }

  int in_sub_stride_size = in_dims.size() - stride_size;
87 88 89
  PADDLE_ENFORCE_EQ(
      in_dims.size(), strides.size() + 2U,
      platform::errors::InvalidArgument(
90 91 92 93 94 95 96
          "The difference of input's dimension and Attr(strides)'s "
          "length must be euqal to 2 for Op(Conv). "
          "But received: input's dimension is %d, input's shape is [%s]; "
          "Attr(stride)'s length is %d, Attr(stride) is [%s]; "
          "difference of input's dimention and Attr(strides)'s length = %u.",
          in_dims.size(), in_dims, strides.size(),
          framework::make_ddim(strides), in_sub_stride_size));
L
liym27 已提交
97 98 99

  const auto input_channels =
      channel_last ? in_dims[in_dims.size() - 1] : in_dims[1];
F
fengjiayi 已提交
100

101 102
  PADDLE_ENFORCE_EQ(
      input_channels, filter_dims[1] * groups,
103
      platform::errors::InvalidArgument(
104 105 106 107 108
          "The number of input's channels should be equal to filter's channels "
          "* groups for Op(Conv). But received: the input's channels is %d, "
          "the input's shape is [%s]; the filter's channels is %d, the "
          "filter's shape is [%s]; the groups is %d, the data_format is %s. "
          "The error may come from wrong data_format setting.",
109 110
          input_channels, in_dims, filter_dims[1], filter_dims, groups,
          data_format));
C
chengduoZH 已提交
111
  PADDLE_ENFORCE_EQ(
Y
Yang Yu 已提交
112
      filter_dims[0] % groups, 0,
113
      platform::errors::InvalidArgument(
114 115 116 117
          "The number of output's channels (filter's first dimension) of "
          "Op(Conv) should be divided by groups. But received: "
          "the output channels is %d, the filter's shape is [%s], "
          "the groups is %d.",
118
          filter_dims[0], filter_dims, groups));
C
chengduoZH 已提交
119

L
liym27 已提交
120 121 122 123 124 125
  framework::DDim in_data_dims;
  if (channel_last) {
    in_data_dims = framework::slice_ddim(in_dims, 1, in_dims.size() - 1);
  } else {
    in_data_dims = framework::slice_ddim(in_dims, 2, in_dims.size());
  }
126

127 128
  framework::DDim filter_data_dims =
      framework::slice_ddim(filter_dims, 2, filter_dims.size());
129

L
liym27 已提交
130 131 132 133 134 135 136 137
  std::vector<int> ksize = framework::vectorize<int>(filter_data_dims);
  UpdatePaddingAndDilation(&paddings, &dilations, padding_algorithm,
                           in_data_dims, strides, ksize);

  std::vector<int64_t> output_shape({in_dims[0]});
  if (!channel_last) {
    output_shape.push_back(filter_dims[0]);
  }
138
  for (int i = 0; i < in_data_dims.size(); ++i) {
T
tink2123 已提交
139
    if ((!ctx->IsRuntime()) &&
L
liym27 已提交
140
        (in_data_dims[i] <= 0 || filter_dims[i + 2] <= 0)) {
T
tink2123 已提交
141 142
      output_shape.push_back(-1);
    } else {
143 144 145
      output_shape.push_back(
          ConvOutputSize(in_data_dims[i], filter_data_dims[i], dilations[i],
                         paddings[2 * i], paddings[2 * i + 1], strides[i]));
T
tink2123 已提交
146
    }
C
chengduoZH 已提交
147
  }
L
liym27 已提交
148 149 150 151
  if (channel_last) {
    output_shape.push_back(filter_dims[0]);
  }

152
  return output_shape;
C
chengduoZH 已提交
153 154
}

155 156
framework::OpKernelType ConvOp::GetExpectedKernelType(
    const framework::ExecutionContext& ctx) const {
X
Xin Pan 已提交
157 158
  int customized_type_value =
      framework::OpKernelType::kDefaultCustomizedTypeValue;
159
  framework::LibraryType library{framework::LibraryType::kPlain};
M
mozga-intel 已提交
160
  // TODO(pzelazko-intel): enable MKLDNN layout when it's ready
161
  auto input_data_type = OperatorWithKernel::IndicateVarDataType(ctx, "Input");
L
liym27 已提交
162 163
  std::string data_format =
      "AnyLayout";  // todo enable data layout when it's ready
M
mozga-intel 已提交
164 165
  framework::DataLayout layout = framework::StringToDataLayout(data_format);

166
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
167
  if (platform::CanCUDNNBeUsed(ctx)) {
168
    library = framework::LibraryType::kCUDNN;
C
chengduoZH 已提交
169 170
  }
#endif
171
#ifdef PADDLE_WITH_MKLDNN
172 173
  if (library == framework::LibraryType::kPlain &&
      this->CanMKLDNNBeUsed(ctx, input_data_type)) {
174
    library = framework::LibraryType::kMKLDNN;
M
mozga-intel 已提交
175
    layout = framework::DataLayout::kMKLDNN;
176
    customized_type_value =
177 178
        (input_data_type == framework::DataTypeTrait<int8_t>::DataType() ||
         input_data_type == framework::DataTypeTrait<uint8_t>::DataType())
179 180
            ? kConvMKLDNNINT8
            : kConvMKLDNNFP32;
181
  }
182
#endif
183

184
  if (input_data_type != framework::proto::VarType::INT8 &&
185 186
      input_data_type != framework::proto::VarType::UINT8 &&
      input_data_type != framework::proto::VarType::BF16) {
187
    auto filter_data_type = ctx.Input<Tensor>("Filter")->type();
188 189 190 191 192 193 194 195
    PADDLE_ENFORCE_EQ(
        input_data_type, filter_data_type,
        platform::errors::InvalidArgument(
            "input and filter data type should be consistent, "
            "but received input data type is %s and filter type "
            "is %s",
            paddle::framework::DataTypeToString(input_data_type),
            paddle::framework::DataTypeToString(filter_data_type)));
196
  }
197
#ifndef PADDLE_WITH_ASCEND_CL
K
Kexin Zhao 已提交
198
  if (input_data_type == framework::proto::VarType::FP16) {
199 200 201 202
    PADDLE_ENFORCE_EQ(
        library, framework::LibraryType::kCUDNN,
        platform::errors::InvalidArgument(
            "float16 can only be used when CUDNN or NPU is used"));
K
Kexin Zhao 已提交
203
  }
204
#endif
W
wuhuanzhou 已提交
205 206 207 208 209 210 211 212 213
#if PADDLE_WITH_CUDA
  if (input_data_type == framework::proto::VarType::BF16 &&
      library == framework::LibraryType::kCUDNN) {
    PADDLE_ENFORCE_GE(
        platform::CudnnVersion(), 8100,
        platform::errors::InvalidArgument(
            "bfloat16 can only be used when CUDNN_VERSION >= 8100"));
  }
#endif  // PADDLE_WITH_CUDA
K
Kexin Zhao 已提交
214

215 216 217
  auto type = framework::OpKernelType(input_data_type, ctx.GetPlace(), layout,
                                      library, customized_type_value);
  return type;
218 219
}

220 221 222 223 224 225 226 227 228 229 230 231 232
framework::OpKernelType ConvOp::GetKernelTypeForVar(
    const std::string& var_name, const Tensor& tensor,
    const framework::OpKernelType& expected_kernel_type) const {
#ifdef PADDLE_WITH_MKLDNN
  // Only input require reshaping, weights and
  // bias are having shape in NCHW order
  if ((var_name == "Input") &&
      (expected_kernel_type.data_layout_ == framework::DataLayout::kMKLDNN) &&
      (tensor.layout() != framework::DataLayout::kMKLDNN)) {
    auto attrs = Attrs();
    auto ar = paddle::framework::AttrReader(attrs);
    const std::string data_format = ar.Get<std::string>("data_format");
    auto dl = framework::StringToDataLayout(data_format);
233
    // Some models may have intentionally set "AnyLayout" for conv
234 235
    // op. Treat this as NCHW (default data_format value)
    if (dl != framework::DataLayout::kAnyLayout) {
236 237
      return framework::OpKernelType(expected_kernel_type.data_type_,
                                     tensor.place(), dl);
238 239 240 241 242 243 244
    }
  }
#endif
  return framework::OpKernelType(expected_kernel_type.data_type_,
                                 tensor.place(), tensor.layout());
}

Y
Yu Yang 已提交
245
void Conv2DOpMaker::Make() {
246 247 248
  AddAttr<bool>("is_test",
                "(bool, default false) Set to true for inference only, false "
                "for training. Some layers may run faster when this is true.")
249 250
      .SetDefault(false)
      .AsExtra();
L
liym27 已提交
251 252 253 254 255 256
  AddInput("Input",
           "(Tensor) The input tensor of convolution operator. "
           "The format of input tensor is NCHW or NHWC, where N is batch size, "
           "C is the "
           "number of channels, H is the height of the feature, "
           "and W is the width of the feature.");
C
chengduoZH 已提交
257
  AddInput("Filter",
C
fix doc  
chengduoZH 已提交
258
           "(Tensor) The filter tensor of convolution operator. "
C
chengduoZH 已提交
259 260
           "The format of the filter tensor is MCHW, where M is the number of "
           "output image channels, C is the number of input image channels, "
C
fix doc  
chengduoZH 已提交
261 262
           "H is the height of the filter, and W is the width of the filter. "
           "If the groups attribute is greater than 1, C equals the number of "
C
chengduoZH 已提交
263
           "input image channels divided by the groups.");
264 265 266 267
  AddInput("Bias",
           "(Tensor) Bias to be added to each output of filter application."
           "The format of output tensor is X (one-dimensional) of size equal"
           "to the number of output channels. Only used with MKL-DNN.")
268 269
      .AsDispensable()
      .AsExtra();
270 271 272
  AddInput("ResidualData",
           "(Tensor) Tensor with residual data "
           "to which convolution output will be added."
273
           "Used with fuse_residual_connection fusion.")
274 275
      .AsDispensable()
      .AsExtra();
Y
Yihua Xu 已提交
276 277
  AddOutput("Output",
            "(Tensor) The output tensor of convolution operator. "
L
liym27 已提交
278
            "It has same data fromat and data type as the Input.");
C
chengduoZH 已提交
279 280 281 282
  AddAttr<std::vector<int>>("strides",
                            "(vector<int> default:{1, 1}), the "
                            "strides(h_stride, w_stride) of "
                            "convolution operator.")
C
chengduoZH 已提交
283
      .SetDefault({1, 1});
C
chengduoZH 已提交
284 285
  AddAttr<std::vector<int>>("paddings",
                            "(vector<int> default:{0, 0}), the "
L
liym27 已提交
286 287
                            "paddings(pad_height_top, pad_height_bottom, "
                            "pad_width_left, pad_wifth_right)  of "
C
chengduoZH 已提交
288
                            "convolution operator.")
C
chengduoZH 已提交
289
      .SetDefault({0, 0});
L
liym27 已提交
290 291 292 293 294 295
  AddAttr<std::string>(
      "padding_algorithm",
      "(string, default \"EXPLICIT\") An optional string from: \"EXPLICIT\","
      "\"SAME\",\"VALID\". Set to \"EXPLICIT\" for explicit padding. "
      "Set to \"SAME\" or \"VALID\" for algorithm of padding. ")
      .SetDefault("EXPLICIT");
C
chengduoZH 已提交
296 297
  AddAttr<int>(
      "groups",
C
chengduoZH 已提交
298
      "(int default:1), the groups number of the convolution operator. "
C
fix doc  
chengduoZH 已提交
299 300 301 302
      "According to grouped convolution in Alex Krizhevsky's Deep CNN paper: "
      "when group=2, the first half of the filters is only connected to the "
      "first half of the input channels, while the second half of the filters "
      "is only connected to the second half of the input channels.")
C
chengduoZH 已提交
303
      .SetDefault(1);
C
chengduoZH 已提交
304
  AddAttr<std::vector<int>>("dilations",
C
chengduoZH 已提交
305 306
                            "(vector<int> default:{1, 1}), the "
                            "dilations(h_dilation, w_dilation) of "
C
chengduoZH 已提交
307
                            "convolution operator.")
C
chengduoZH 已提交
308
      .SetDefault({1, 1});
309 310 311
  AddAttr<bool>(
      "use_cudnn",
      "(bool, default false) Only used in cudnn kernel, need install cudnn")
312 313
      .SetDefault(false)
      .AsExtra();
314 315
  AddAttr<bool>("fuse_relu_before_depthwise_conv",
                "(bool, default false) Only used in cuda depthwise kernel")
316 317
      .SetDefault(false)
      .AsExtra();
318 319
  AddAttr<bool>("use_mkldnn",
                "(bool, default false) Only used in mkldnn kernel")
320 321
      .SetDefault(false)
      .AsExtra();
322 323 324 325
  AddAttr<bool>(
      "use_quantizer",
      "(bool, default false) "
      "This parameter is no longer used. Use 'mkldnn_data_type' instead.")
326 327
      .SetDefault(false)
      .AsExtra();
328 329 330 331
  AddAttr<std::string>(
      "mkldnn_data_type",
      "(string, default \"float32\"). Data type of mkldnn kernel")
      .SetDefault("float32")
332 333
      .InEnum({"float32", "int8", "bfloat16"})
      .AsExtra();
M
Michal Gallus 已提交
334
  AddAttr<bool>("fuse_relu", "(bool, default false) Only used in mkldnn kernel")
335 336
      .SetDefault(false)
      .AsExtra();
337 338
  AddAttr<bool>("fuse_brelu",
                "(bool, default false) Only used in mkldnn kernel")
339 340
      .SetDefault(false)
      .AsExtra();
341 342
  AddAttr<float>("fuse_brelu_threshold",
                 "(float, default false 6.0) Only used in mkldnn kernel")
343 344
      .SetDefault(6.0f)
      .AsExtra();
345 346
  AddAttr<std::string>("fuse_activation",
                       "(string, default \"\") Only used in mkldnn kernel")
347 348
      .SetDefault("")
      .AsExtra();
349 350
  AddAttr<float>("fuse_alpha",
                 "(float, default 0.0) Only used in mkldnn kernel")
351 352
      .SetDefault(0.0f)
      .AsExtra();
353
  AddAttr<float>("fuse_beta", "(float, default 0.0) Only used in mkldnn kernel")
354 355
      .SetDefault(0.0f)
      .AsExtra();
356 357 358 359
  AddAttr<bool>(
      "use_addto",
      "(bool, default false) If use addto strategy or not, only used in "
      "cudnn kernel")
360 361
      .SetDefault(false)
      .AsExtra();
362
  AddAttr<bool>("fuse_residual_connection",
363
                "(bool, default false) Only used in mkldnn kernel. Used "
364 365
                "whenever convolution output is as an input to residual "
                "connection.")
366 367
      .SetDefault(false)
      .AsExtra();
368 369 370
  AddAttr<float>("Scale_in",
                 "Scale_in to be used for int8 input data."
                 "Only used with MKL-DNN INT8.")
371 372
      .SetDefault(1.0f)
      .AsExtra();
373 374 375
  AddAttr<float>("Scale_out",
                 "Scale_out to be used for int8 output data."
                 "Only used with MKL-DNN INT8.")
376 377
      .SetDefault(1.0f)
      .AsExtra();
378 379 380
  AddAttr<float>("Scale_in_eltwise",
                 "Scale_in_eltwise to be used for int8 eltwise input data."
                 "Only used with MKL-DNN INT8.")
381 382
      .SetDefault(1.0f)
      .AsExtra();
383 384 385
  AddAttr<std::vector<float>>("Scale_weights",
                              "Scale_weights to be used for int8 weights data."
                              "Only used with MKL-DNN INT8.")
386 387
      .SetDefault({1.0f})
      .AsExtra();
388 389 390
  AddAttr<bool>("force_fp32_output",
                "(bool, default false) Force INT8 kernel output FP32, only "
                "used in MKL-DNN INT8")
391 392
      .SetDefault(false)
      .AsExtra();
393 394 395 396 397 398
  AddAttr<std::string>(
      "data_format",
      "(string, default NCHW) Only used in "
      "An optional string from: \"NHWC\", \"NCHW\". "
      "Defaults to \"NHWC\". Specify the data format of the output data, "
      "the input will be transformed automatically. ")
L
liym27 已提交
399
      .SetDefault("NCHW");
400 401 402 403 404 405 406 407
  // TODO(dzhwinter): need to registered layout transform function
  AddAttr<int>("workspace_size_MB",
               "Only used in cudnn kernel. Need set use_cudnn to true."
               "workspace size for cudnn, in MB, "
               "workspace is a section of GPU memory which will be "
               "allocated/freed each time the operator runs, larger "
               "workspace size can increase performance but also requires "
               "better hardware. This size should be chosen carefully.")
408 409
      .SetDefault(platform::GetDefaultConvWorkspaceSizeLimitMB())
      .AsExtra();
410 411
  AddAttr<bool>("exhaustive_search",
                "(bool, default false) cuDNN has many algorithm to calculation "
C
chengduo 已提交
412
                "convolution, whether enable exhaustive search "
翟飞跃 已提交
413
                "for cuDNN convolution or not, default is False.")
414 415
      .SetDefault(false)
      .AsExtra();
L
liym27 已提交
416

C
chengduoZH 已提交
417
  AddComment(R"DOC(
C
fix doc  
chengduoZH 已提交
418 419
Convolution Operator.

C
chengduoZH 已提交
420
The convolution operation calculates the output based on the input, filter
C
chengduoZH 已提交
421
and strides, paddings, dilations, groups parameters. The size of each dimension of the
C
chengduoZH 已提交
422
parameters is checked in the infer-shape.
L
liym27 已提交
423
Input(Input) and Output(Output) are in NCHW or NHWC format. Where N is batch
C
fix doc  
chengduoZH 已提交
424
size, C is the number of channels, H is the height of the feature, and W is
C
chengduoZH 已提交
425
the width of the feature.
426
Filters(Input) is MCHW format format. Where M is the number of output image channels, C is
C
chengduoZH 已提交
427 428 429 430
the number of input image channels, H is the height of the filter, and W
is the width of the filter.
Parameters(strides, paddings, dilations) are two elements. These two elements represent
height and width, respectively.
C
chengduoZH 已提交
431 432 433 434
The input(X) size and output(Out) size may be different.

Example:
  Input:
C
chengduoZH 已提交
435 436
       Input shape: $(N, C_{in}, H_{in}, W_{in})$
       Filter shape: $(C_{out}, C_{in}, H_f, W_f)$
C
chengduoZH 已提交
437
  Output:
C
chengduoZH 已提交
438 439 440
       Output shape: $(N, C_{out}, H_{out}, W_{out})$
  Where
$$
L
liym27 已提交
441 442
       H_{out}= \frac{(H_{in} + pad_height_top + pad_height_bottom - (dilations[0] * (H_f - 1) + 1))}{strides[0]}+ 1 \\
       W_{out}= \frac{(W_{in} + pad_width_left + pad_width_right - (dilations[1] * (W_f - 1) + 1))}{strides[1]}+ 1
C
chengduoZH 已提交
443
$$
C
chengduoZH 已提交
444
)DOC");
Q
qingqing01 已提交
445
  Apply();
C
chengduoZH 已提交
446 447
}

Y
Yu Yang 已提交
448
void Conv3DOpMaker::Make() {
449 450 451
  AddAttr<bool>("is_test",
                "(bool, default false) Set to true for inference only, false "
                "for training. Some layers may run faster when this is true.")
452 453
      .SetDefault(false)
      .AsExtra();
C
chengduoZH 已提交
454 455
  AddInput(
      "Input",
C
fix doc  
chengduoZH 已提交
456
      "(Tensor) The input tensor of convolution operator. "
L
liym27 已提交
457 458
      "The format of input tensor is NCDHW or NDHWC. Where N is batch size, C "
      "is the "
C
fix doc  
chengduoZH 已提交
459 460 461
      "number of channels, D is the depth of the feature, H is the height of "
      "the feature, "
      "and W is the width of the feature.");
C
chengduoZH 已提交
462
  AddInput("Filter",
C
fix doc  
chengduoZH 已提交
463
           "(Tensor) The filter tensor of convolution operator. "
C
chengduoZH 已提交
464 465
           "The format of the filter tensor is MCDHW, where M is the number of "
           "output image channels, C is the number of input image channels, "
C
fix doc  
chengduoZH 已提交
466 467 468
           "D is the depth of the filter, H is the height of the filter, and W "
           "is the width of the filter."
           "If the groups attribute is greater than 1, C equals the number of "
C
chengduoZH 已提交
469
           "input image channels divided by the groups.");
470 471 472 473
  AddInput("ResidualData",
           "(Tensor) Tensor with residual data "
           "to which convolution output will be added."
           "Used with fuse_residual_connection fusion.")
474 475
      .AsDispensable()
      .AsExtra();
Y
Yihua Xu 已提交
476 477
  AddOutput("Output",
            "(Tensor) The output tensor of convolution operator."
L
liym27 已提交
478
            "It has same data fromat and data type as the Input.");
C
chengduoZH 已提交
479 480 481 482
  AddAttr<std::vector<int>>("strides",
                            "(vector<int>, default:{1, 1, 1}), the "
                            "strides(d_stride, h_stride, w_stride) of "
                            "convolution operator.")
C
chengduoZH 已提交
483
      .SetDefault({1, 1, 1});
L
liym27 已提交
484 485 486 487 488 489
  AddAttr<std::vector<int>>(
      "paddings",
      "(vector<int>, default:{0, 0, 0}), the "
      "paddings(pad_depth_front, pad_depth_back, pad_height_top, "
      "pad_height_bottom, pad_width_left, pad_width_right) of convolution "
      "operator.")
C
chengduoZH 已提交
490
      .SetDefault({0, 0, 0});
L
liym27 已提交
491 492 493 494 495 496
  AddAttr<std::string>(
      "padding_algorithm",
      "(string, default \"EXPLICIT\") An optional string from: \"EXPLICIT\","
      "\"SAME\",\"VALID\". Set to \"EXPLICIT\" for explicit padding. "
      "Set to \"SAME\" or \"VALID\" for algorithm of padding. ")
      .SetDefault("EXPLICIT");
C
chengduoZH 已提交
497 498
  AddAttr<int>(
      "groups",
C
chengduoZH 已提交
499
      "(int default:1), the groups number of the convolution operator. "
C
fix doc  
chengduoZH 已提交
500 501 502 503
      "According to grouped convolution in Alex Krizhevsky's Deep CNN paper: "
      "when group=2, the first half of the filters is only connected to the "
      "first half of the input channels, while the second half of the filters "
      "is only connected to the second half of the input channels.")
C
chengduoZH 已提交
504
      .SetDefault(1);
C
chengduoZH 已提交
505
  AddAttr<std::vector<int>>("dilations",
C
chengduoZH 已提交
506 507
                            "(vector<int> default:{1, 1, 1}), the "
                            "dilations(d_dilation, h_dilation, w_dilation) of "
C
chengduoZH 已提交
508
                            "convolution operator.")
C
chengduoZH 已提交
509
      .SetDefault({1, 1, 1});
510 511 512
  AddAttr<bool>(
      "use_cudnn",
      "(bool, default false) Only used in cudnn kernel, need install cudnn")
513 514
      .SetDefault(false)
      .AsExtra();
515 516
  AddAttr<bool>("use_mkldnn",
                "(bool, default false) Only used in mkldnn kernel")
517 518
      .SetDefault(false)
      .AsExtra();
519 520 521 522
  AddAttr<std::string>(
      "mkldnn_data_type",
      "(string, default \"float32\"). Data type of mkldnn kernel")
      .SetDefault("float32")
523 524
      .InEnum({"float32", "int8", "bfloat16"})
      .AsExtra();
525
  AddAttr<bool>("fuse_relu", "(bool, default false) Only used in mkldnn kernel")
526 527
      .SetDefault(false)
      .AsExtra();
528 529
  AddAttr<std::string>("fuse_activation",
                       "(string, default \"\") Only used in mkldnn kernel")
530 531
      .SetDefault("")
      .AsExtra();
532 533
  AddAttr<float>("fuse_alpha",
                 "(float, default 0.0) Only used in mkldnn kernel")
534 535
      .SetDefault(0.0f)
      .AsExtra();
536
  AddAttr<float>("fuse_beta", "(float, default 0.0) Only used in mkldnn kernel")
537 538
      .SetDefault(0.0f)
      .AsExtra();
539 540 541 542
  AddAttr<bool>(
      "use_addto",
      "(bool, default false) If use addto strategy or not, only used in "
      "cudnn kernel")
543 544
      .SetDefault(false)
      .AsExtra();
545 546 547 548
  AddAttr<bool>("fuse_residual_connection",
                "(bool, default false) Only used in mkldnn kernel. Used "
                "whenever convolution output is as an input to residual "
                "connection.")
549 550
      .SetDefault(false)
      .AsExtra();
551 552
  AddAttr<std::string>(
      "data_format",
L
liym27 已提交
553 554 555
      "(string, default NCDHW) Only used in "
      "An optional string from: \"NDHWC\", \"NCDHW\". "
      "Defaults to \"NDHWC\". Specify the data format of the output data, "
556
      "the input will be transformed automatically. ")
L
liym27 已提交
557
      .SetDefault("NCDHW");
558 559
  AddAttr<bool>("force_fp32_output",
                "(bool, default false) Only used in mkldnn INT8 kernel")
560 561
      .SetDefault(false)
      .AsExtra();
562 563 564 565 566 567 568
  // TODO(dzhwinter): need to registered layout transform function
  AddAttr<int>("workspace_size_MB",
               "Only used in cudnn kernel. workspace size for cudnn, in MB, "
               "workspace is a section of GPU memory which will be "
               "allocated/freed each time the operator runs, larger "
               "workspace size can increase performance but also requires "
               "better hardware. This size should be chosen carefully.")
569 570
      .SetDefault(platform::GetDefaultConvWorkspaceSizeLimitMB())
      .AsExtra();
571 572
  AddAttr<bool>("exhaustive_search",
                "(bool, default false) cuDNN has many algorithm to calculation "
C
chengduo 已提交
573
                "convolution, whether enable exhaustive search "
翟飞跃 已提交
574
                "for cuDNN convolution or not, default is False.")
575 576
      .SetDefault(false)
      .AsExtra();
C
chengduoZH 已提交
577
  AddComment(R"DOC(
C
fix doc  
chengduoZH 已提交
578 579
Convolution3D Operator.

C
chengduoZH 已提交
580
The convolution operation calculates the output based on the input, filter
C
chengduoZH 已提交
581
and strides, paddings, dilations, groups parameters. The size of each dimension of the
C
chengduoZH 已提交
582
parameters is checked in the infer-shape.
L
liym27 已提交
583
Input(Input) and output(Output) are in NCDHW or NDHWC format, where N is batch
C
fix doc  
chengduoZH 已提交
584
size, C is the number of channels,D is the depth of the feature, H is the height of
C
chengduoZH 已提交
585 586 587 588 589 590
the feature, and W is the width of the feature.
Filters(Input) is MCDHW format, where M is the number of output image channels,
C is the number of input image channels, D is the depth of the filter,
H is the height of the filter, and W is the width of the filter.
Parameters(strides, paddings, dilations) are three elements. These three elements
represent depth, height and width, respectively.
C
fix doc  
chengduoZH 已提交
591 592 593 594
The input(X) size and output(Out) size may be different.

Example:
  Input:
C
chengduoZH 已提交
595 596
       Input shape: $(N, C_{in}, D_{in}, H_{in}, W_{in})$
       Filter shape: $(C_{out}, C_{in}, D_f, H_f, W_f)$
C
fix doc  
chengduoZH 已提交
597
  Output:
C
chengduoZH 已提交
598 599 600
       Output shape: $(N, C_{out}, D_{out}, H_{out}, W_{out})$
  Where
  $$
L
liym27 已提交
601 602 603
       D_{out}= \frac{(D_{in} + pad_depth_front + pad_depth_back - (dilations[0] * (D_f - 1) + 1))}{ strides[0]}+ 1 \\
       H_{out}= \frac{(H_{in} + pad_height_top + pad_height_bottom - (dilations[1] * (H_f - 1) + 1))}{ strides[1]}+ 1 \\
       W_{out}= \frac{(W_{in} + pad_width_left + pad_width_right - (dilations[2] * (W_f - 1) + 1))}{ strides[2]}+ 1
C
chengduoZH 已提交
604
  $$
C
chengduoZH 已提交
605
)DOC");
Q
qingqing01 已提交
606
  Apply();
C
chengduoZH 已提交
607 608
}

C
chengduoZH 已提交
609 610 611 612 613 614 615 616 617 618 619
void ConvOpGrad::InferShape(framework::InferShapeContext* ctx) const {
  auto in_dims = ctx->GetInputDim("Input");
  auto filter_dims = ctx->GetInputDim("Filter");
  if (ctx->HasOutput(framework::GradVarName("Input"))) {
    ctx->SetOutputDim(framework::GradVarName("Input"), in_dims);
  }
  if (ctx->HasOutput(framework::GradVarName("Filter"))) {
    ctx->SetOutputDim(framework::GradVarName("Filter"), filter_dims);
  }
}

620 621
framework::OpKernelType ConvOpGrad::GetExpectedKernelType(
    const framework::ExecutionContext& ctx) const {
X
Xin Pan 已提交
622 623
  int customized_type_value =
      framework::OpKernelType::kDefaultCustomizedTypeValue;
624
  framework::LibraryType library_{framework::LibraryType::kPlain};
M
mozga-intel 已提交
625
  // TODO(pzelazko-intel): enable MKLDNN layout when it's ready
L
liym27 已提交
626
  std::string data_format = "AnyLayout";
M
mozga-intel 已提交
627
  framework::DataLayout layout_ = framework::StringToDataLayout(data_format);
628
  auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "Input");
M
mozga-intel 已提交
629

630
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
631 632
  if (platform::CanCUDNNBeUsed(ctx)) {
    library_ = framework::LibraryType::kCUDNN;
C
chengduoZH 已提交
633 634
  }
#endif
635 636
#ifdef PADDLE_WITH_MKLDNN
  if (library_ == framework::LibraryType::kPlain &&
637
      this->CanMKLDNNBeUsed(ctx, data_type)) {
638
    const std::string data_format = ctx.Attr<std::string>("data_format");
639
    library_ = framework::LibraryType::kMKLDNN;
M
mozga-intel 已提交
640
    layout_ = framework::DataLayout::kMKLDNN;
X
Xin Pan 已提交
641
    customized_type_value = kConvMKLDNNFP32;
642
  }
643
#endif
644

645 646
  auto type = framework::OpKernelType(data_type, ctx.GetPlace(), layout_,
                                      library_, customized_type_value);
647
  return type;
648 649
}

650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675
framework::OpKernelType ConvOpGrad::GetKernelTypeForVar(
    const std::string& var_name, const Tensor& tensor,
    const framework::OpKernelType& expected_kernel_type) const {
#ifdef PADDLE_WITH_MKLDNN
  // Only input require reshaping, weights and
  // bias are having shape in NCHW order
  if (((var_name == "Input") ||
       (var_name == framework::GradVarName("Output"))) &&
      (expected_kernel_type.data_layout_ == framework::DataLayout::kMKLDNN) &&
      (tensor.layout() != framework::DataLayout::kMKLDNN)) {
    auto attrs = Attrs();
    auto ar = paddle::framework::AttrReader(attrs);
    const std::string data_format = ar.Get<std::string>("data_format");
    auto dl = framework::StringToDataLayout(data_format);
    // Some models may have intentionally set "AnyLayout" for pool
    // op. Treat this as NCHW (default data_format value)
    if (dl != framework::DataLayout::kAnyLayout) {
      return framework::OpKernelType(expected_kernel_type.data_type_,
                                     tensor.place(), dl);
    }
  }
#endif
  return framework::OpKernelType(expected_kernel_type.data_type_,
                                 tensor.place(), tensor.layout());
}

H
hong 已提交
676 677
template <typename T>
class Conv2DGradMaker : public framework::SingleGradOpMaker<T> {
678
 public:
H
hong 已提交
679
  using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
680

681
  void Apply(GradOpPtr<T> op) const override {
S
sneaxiy 已提交
682
    op->SetType(this->ForwardOpType() + "_grad");
H
hong 已提交
683 684 685 686
    op->SetInput("Input", this->Input("Input"));
    op->SetInput("Filter", this->Input("Filter"));
    op->SetInput("Bias", this->Input("Bias"));
    op->SetInput(framework::GradVarName("Output"), this->OutputGrad("Output"));
687

H
hong 已提交
688 689 690 691
    op->SetOutput(framework::GradVarName("Input"), this->InputGrad("Input"));
    op->SetOutput(framework::GradVarName("Filter"), this->InputGrad("Filter"));
    op->SetOutput(framework::GradVarName("Bias"), this->InputGrad("Bias"));
    op->SetAttrMap(this->Attrs());
692
  }
S
sneaxiy 已提交
693 694
};

H
hong 已提交
695 696
template <typename T>
class Conv3DGradMaker : public framework::SingleGradOpMaker<T> {
S
sneaxiy 已提交
697
 public:
H
hong 已提交
698
  using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
699

700
  void Apply(GradOpPtr<T> op) const override {
S
sneaxiy 已提交
701
    op->SetType(this->ForwardOpType() + "_grad");
H
hong 已提交
702 703 704
    op->SetInput("Input", this->Input("Input"));
    op->SetInput("Filter", this->Input("Filter"));
    op->SetInput(framework::GradVarName("Output"), this->OutputGrad("Output"));
S
sneaxiy 已提交
705

H
hong 已提交
706 707
    op->SetOutput(framework::GradVarName("Input"), this->InputGrad("Input"));
    op->SetOutput(framework::GradVarName("Filter"), this->InputGrad("Filter"));
S
sneaxiy 已提交
708

H
hong 已提交
709 710
    if (this->HasInput("ResidualData")) {
      op->SetInput("ResidualData", this->Input("ResidualData"));
S
sneaxiy 已提交
711 712
    }

H
hong 已提交
713
    op->SetAttrMap(this->Attrs());
714 715 716
  }
};

Q
qingqing01 已提交
717 718 719 720
/*
 * Inputs:  I, W, dO, ddI, ddW
 * Outputs: ddO, dW, dI
 */
H
hong 已提交
721 722
template <typename T>
class Conv2DDoubleGradMaker : public framework::SingleGradOpMaker<T> {
Q
qingqing01 已提交
723
 public:
H
hong 已提交
724
  using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
Q
qingqing01 已提交
725

726
  void Apply(GradOpPtr<T> op) const override {
Q
qingqing01 已提交
727 728
    op->SetType(this->ForwardOpType() + "_grad");
    // I, W, dO, ddI, ddW
H
hong 已提交
729 730 731 732 733 734
    op->SetInput("Input", this->Input("Input"));
    op->SetInput("Filter", this->Input("Filter"));
    op->SetInput("DOutput", this->Input(framework::GradVarName("Output")));
    op->SetInput("DDInput", this->OutputGrad(framework::GradVarName("Input")));
    op->SetInput("DDFilter",
                 this->OutputGrad(framework::GradVarName("Filter")));
Q
qingqing01 已提交
735 736 737 738

    // ddO, dI, dW
    // Unlike grad op, double grad op does not use name@GRAD@GRAD
    // as key of ops' inputs and outputs.
H
hong 已提交
739 740
    auto ddx = this->OutputGrad(framework::GradVarName("Input"));
    auto ddw = this->OutputGrad(framework::GradVarName("Filter"));
741

L
lvmengsi 已提交
742
    op->SetOutput("DDOutput",
H
hong 已提交
743
                  ddx.empty()
744
                      ? this->EmptyInputGrad()
H
hong 已提交
745
                      : this->InputGrad(framework::GradVarName("Output")));
746 747 748 749
    op->SetOutput("DFilter", ddx.empty() ? this->EmptyInputGrad()
                                         : this->InputGrad("Filter"));
    op->SetOutput("DInput", ddw.empty() ? this->EmptyInputGrad()
                                        : this->InputGrad("Input"));
750

H
hong 已提交
751
    op->SetAttrMap(this->Attrs());
Q
qingqing01 已提交
752 753 754
  }
};

L
lvmengsi 已提交
755 756 757 758
/*
 * Inputs:  I, W, dO, ddI, ddW
 * Outputs: ddO, dW, dI
 */
H
hong 已提交
759 760
template <typename T>
class Conv3DDoubleGradMaker : public framework::SingleGradOpMaker<T> {
L
lvmengsi 已提交
761
 public:
H
hong 已提交
762
  using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
L
lvmengsi 已提交
763

764
  void Apply(GradOpPtr<T> op) const override {
L
lvmengsi 已提交
765 766
    op->SetType(this->ForwardOpType() + "_grad");
    // I, W, dO, ddI, ddW
H
hong 已提交
767 768 769 770 771 772
    op->SetInput("Input", this->Input("Input"));
    op->SetInput("Filter", this->Input("Filter"));
    op->SetInput("DOutput", this->Input(framework::GradVarName("Output")));
    op->SetInput("DDInput", this->OutputGrad(framework::GradVarName("Input")));
    op->SetInput("DDFilter",
                 this->OutputGrad(framework::GradVarName("Filter")));
L
lvmengsi 已提交
773

H
hong 已提交
774 775
    auto ddx = this->OutputGrad(framework::GradVarName("Input"));
    auto ddw = this->OutputGrad(framework::GradVarName("Filter"));
L
lvmengsi 已提交
776

L
lvmengsi 已提交
777
    op->SetOutput("DDOutput",
H
hong 已提交
778
                  ddx.empty()
779
                      ? this->EmptyInputGrad()
H
hong 已提交
780
                      : this->InputGrad(framework::GradVarName("Output")));
781 782 783 784
    op->SetOutput("DFilter", ddx.empty() ? this->EmptyInputGrad()
                                         : this->InputGrad("Filter"));
    op->SetOutput("DInput", ddw.empty() ? this->EmptyInputGrad()
                                        : this->InputGrad("Input"));
L
lvmengsi 已提交
785

H
hong 已提交
786
    op->SetAttrMap(this->Attrs());
L
lvmengsi 已提交
787 788 789
  }
};

Q
qingqing01 已提交
790 791 792 793 794
void ConvOpDoubleGrad::InferShape(framework::InferShapeContext* ctx) const {
  auto x_dims = ctx->GetInputDim("Input");
  auto w_dims = ctx->GetInputDim("Filter");
  auto do_dims = ctx->GetInputDim("DOutput");

L
lvmengsi 已提交
795 796
  if (ctx->HasOutput("DDOutput") &&
      (ctx->HasInput("DDInput") || (ctx->HasInput("DDFilter")))) {
Q
qingqing01 已提交
797 798
    ctx->SetOutputDim("DDOutput", do_dims);
  }
799
  if (ctx->HasOutput("DFilter") && ctx->HasInput("DDInput")) {
Q
qingqing01 已提交
800 801
    ctx->SetOutputDim("DFilter", w_dims);
  }
802
  if (ctx->HasOutput("DInput") && ctx->HasInput("DDFilter")) {
Q
qingqing01 已提交
803 804 805 806 807 808 809 810 811
    ctx->SetOutputDim("DInput", x_dims);
  }
}

framework::OpKernelType ConvOpDoubleGrad::GetExpectedKernelType(
    const framework::ExecutionContext& ctx) const {
  int customized_type_value =
      framework::OpKernelType::kDefaultCustomizedTypeValue;
  framework::LibraryType library_{framework::LibraryType::kPlain};
L
liym27 已提交
812
  std::string data_format = "AnyLayout";
Q
qingqing01 已提交
813 814
  framework::DataLayout layout_ = framework::StringToDataLayout(data_format);

815
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
Q
qingqing01 已提交
816 817
  if (platform::CanCUDNNBeUsed(ctx)) {
    library_ = framework::LibraryType::kCUDNN;
L
lvmengsi 已提交
818
  }
Q
qingqing01 已提交
819
#endif
820 821 822
  auto type = framework::OpKernelType(
      OperatorWithKernel::IndicateVarDataType(ctx, "Input"), ctx.GetPlace(),
      layout_, library_, customized_type_value);
Q
qingqing01 已提交
823 824 825
  return type;
}

C
chengduoZH 已提交
826 827 828 829
}  // namespace operators
}  // namespace paddle

namespace ops = paddle::operators;
Y
Yang Yang 已提交
830
REGISTER_OPERATOR(conv2d, ops::ConvOp, ops::Conv2DOpMaker,
H
hong 已提交
831 832 833 834 835 836
                  ops::ConvOpInferVarType,
                  ops::Conv2DGradMaker<paddle::framework::OpDesc>,
                  ops::Conv2DGradMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(conv2d_grad, ops::ConvOpGrad,
                  ops::Conv2DDoubleGradMaker<paddle::framework::OpDesc>,
                  ops::Conv2DDoubleGradMaker<paddle::imperative::OpBase>);
Q
qingqing01 已提交
837
REGISTER_OPERATOR(conv2d_grad_grad, ops::ConvOpDoubleGrad);
838 839

// depthwise convolution op
Y
Yang Yang 已提交
840
REGISTER_OPERATOR(depthwise_conv2d, ops::ConvOp, ops::Conv2DOpMaker,
H
hong 已提交
841 842 843
                  ops::ConvOpInferVarType,
                  ops::Conv2DGradMaker<paddle::framework::OpDesc>,
                  ops::Conv2DGradMaker<paddle::imperative::OpBase>);
844 845 846 847
REGISTER_OPERATOR(depthwise_conv2d_grad, ops::ConvOpGrad,
                  ops::Conv2DDoubleGradMaker<paddle::framework::OpDesc>,
                  ops::Conv2DDoubleGradMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(depthwise_conv2d_grad_grad, ops::ConvOpDoubleGrad);
C
chengduo 已提交
848

Y
Yang Yang 已提交
849
REGISTER_OPERATOR(conv3d, ops::ConvOp, ops::Conv3DOpMaker,
H
hong 已提交
850 851 852 853 854 855
                  ops::ConvOpInferVarType,
                  ops::Conv3DGradMaker<paddle::framework::OpDesc>,
                  ops::Conv3DGradMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(conv3d_grad, ops::ConvOpGrad,
                  ops::Conv3DDoubleGradMaker<paddle::framework::OpDesc>,
                  ops::Conv3DDoubleGradMaker<paddle::imperative::OpBase>);
L
lvmengsi 已提交
856
REGISTER_OPERATOR(conv3d_grad_grad, ops::ConvOpDoubleGrad);
C
chengduoZH 已提交
857

858 859
// depthwise conv kernel
// TODO(xingzhaolong): neon kernel for mobile
Z
zlx 已提交
860
REGISTER_OP_CPU_KERNEL(
861
    depthwise_conv2d,
X
xzl 已提交
862 863 864 865
    ops::GemmConvKernel<paddle::platform::CPUDeviceContext, float>,
    ops::GemmConvKernel<paddle::platform::CPUDeviceContext, double>);

REGISTER_OP_CPU_KERNEL(
866
    depthwise_conv2d_grad,
X
xzl 已提交
867 868
    ops::GemmConvGradKernel<paddle::platform::CPUDeviceContext, float>,
    ops::GemmConvGradKernel<paddle::platform::CPUDeviceContext, double>);
Z
zlx 已提交
869

C
chengduoZH 已提交
870
REGISTER_OP_CPU_KERNEL(
Q
QI JUN 已提交
871 872 873 874 875 876
    conv2d, ops::GemmConvKernel<paddle::platform::CPUDeviceContext, float>,
    ops::GemmConvKernel<paddle::platform::CPUDeviceContext, double>);
REGISTER_OP_CPU_KERNEL(
    conv2d_grad,
    ops::GemmConvGradKernel<paddle::platform::CPUDeviceContext, float>,
    ops::GemmConvGradKernel<paddle::platform::CPUDeviceContext, double>);
L
lvmengsi 已提交
877 878 879 880
REGISTER_OP_CPU_KERNEL(
    conv2d_grad_grad,
    ops::GemmConvDoubleGradKernel<paddle::platform::CPUDeviceContext, float>,
    ops::GemmConvDoubleGradKernel<paddle::platform::CPUDeviceContext, double>);
C
chengduoZH 已提交
881 882

REGISTER_OP_CPU_KERNEL(
Q
QI JUN 已提交
883 884 885 886 887 888
    conv3d, ops::GemmConvKernel<paddle::platform::CPUDeviceContext, float>,
    ops::GemmConvKernel<paddle::platform::CPUDeviceContext, double>);
REGISTER_OP_CPU_KERNEL(
    conv3d_grad,
    ops::GemmConvGradKernel<paddle::platform::CPUDeviceContext, float>,
    ops::GemmConvGradKernel<paddle::platform::CPUDeviceContext, double>);
L
lvmengsi 已提交
889 890 891 892
REGISTER_OP_CPU_KERNEL(
    conv3d_grad_grad,
    ops::GemmConvDoubleGradKernel<paddle::platform::CPUDeviceContext, float>,
    ops::GemmConvDoubleGradKernel<paddle::platform::CPUDeviceContext, double>);
893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925

REGISTER_OP_VERSION(conv2d)
    .AddCheckpoint(
        R"ROC(
      Upgrade conv2d, add a new attribute [use_addto].
    )ROC",
        paddle::framework::compatible::OpVersionDesc().NewAttr(
            "use_addto",
            "In order to support new feature (inplace addto strategy) for "
            "gradient accumulation.",
            false));

REGISTER_OP_VERSION(depthwise_conv2d)
    .AddCheckpoint(
        R"ROC(
      Upgrade depthwise_conv2d, add a new attribute [use_addto].
    )ROC",
        paddle::framework::compatible::OpVersionDesc().NewAttr(
            "use_addto",
            "In order to support new feature (inplace addto strategy) for "
            "gradient accumulation.",
            false));

REGISTER_OP_VERSION(conv3d)
    .AddCheckpoint(
        R"ROC(
      Upgrade conv3d, add a new attribute [use_addto].
    )ROC",
        paddle::framework::compatible::OpVersionDesc().NewAttr(
            "use_addto",
            "In order to support new feature (inplace addto strategy) for "
            "gradient accumulation.",
            false));