conv_op.cc 38.2 KB
Newer Older
1
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
C
chengduoZH 已提交
2

L
Luo Tao 已提交
3 4 5
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
C
chengduoZH 已提交
6

L
Luo Tao 已提交
7
    http://www.apache.org/licenses/LICENSE-2.0
C
chengduoZH 已提交
8

L
Luo Tao 已提交
9 10 11 12 13
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
C
chengduoZH 已提交
14

Y
Yi Wang 已提交
15
#include "paddle/fluid/operators/conv_op.h"
Y
Update  
Yi Wang 已提交
16

17
#include <memory>
Y
Update  
Yi Wang 已提交
18 19 20
#include <string>
#include <vector>

21 22
#include "paddle/fluid/framework/op_version_registry.h"

23 24 25
#ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/platform/cudnn_helper.h"
#endif
26 27 28 29 30

#ifdef PADDLE_WITH_HIP
#include "paddle/fluid/platform/miopen_helper.h"
#endif

31 32 33
#ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/platform/mkldnn_helper.h"
#endif
34
#include "paddle/fluid/platform/cudnn_workspace_helper.h"
C
chengduoZH 已提交
35 36 37 38

namespace paddle {
namespace operators {

39 40
std::vector<int64_t> ConvOp::ComputeOutputShape(
    framework::InferShapeContext* ctx) const {
41 42
  OP_INOUT_CHECK(ctx->HasInput("Input"), "Input", "Input", "Conv");
  OP_INOUT_CHECK(ctx->HasInput("Filter"), "Input", "Filter", "Conv");
C
chengduoZH 已提交
43 44 45

  auto in_dims = ctx->GetInputDim("Input");
  auto filter_dims = ctx->GetInputDim("Filter");
46

C
chengduoZH 已提交
47 48
  std::vector<int> strides = ctx->Attrs().Get<std::vector<int>>("strides");
  std::vector<int> paddings = ctx->Attrs().Get<std::vector<int>>("paddings");
L
liym27 已提交
49 50
  std::string padding_algorithm =
      ctx->Attrs().Get<std::string>("padding_algorithm");
C
chengduoZH 已提交
51
  int groups = ctx->Attrs().Get<int>("groups");
C
chengduoZH 已提交
52
  std::vector<int> dilations = ctx->Attrs().Get<std::vector<int>>("dilations");
53 54 55 56 57 58 59 60 61
  int dilation_size = dilations.size();
  for (int i = 0; i < dilation_size; ++i) {
    PADDLE_ENFORCE_GT(
        dilations[i], 0,
        platform::errors::InvalidArgument(
            "The dilation of Op(Conv) should be larget than 0, but received "
            "dilation is %d.",
            dilations[i]));
  }
L
liym27 已提交
62
  const std::string data_format = ctx->Attrs().Get<std::string>("data_format");
63 64 65 66 67

  // MKL-DNN Kernels are using NCHW order of dims description
  // so we ignore data_format consideration for MKL-DNN kernel
  const bool channel_last = (this->IsMKLDNNType() == false) &&
                            (data_format == "NHWC" || data_format == "NDHWC");
C
chengduoZH 已提交
68

69 70
  PADDLE_ENFORCE_EQ(
      in_dims.size() == 4 || in_dims.size() == 5, true,
71
      platform::errors::InvalidArgument(
72 73
          "The input of Op(Conv) should be a 4-D or 5-D Tensor. But "
          "received: input's dimension is %u, input's shape is [%s].",
74
          in_dims.size(), in_dims));
75

C
chengduoZH 已提交
76 77
  PADDLE_ENFORCE_EQ(
      in_dims.size(), filter_dims.size(),
78
      platform::errors::InvalidArgument(
79 80 81 82
          "The input's dimension and filter's dimension of "
          "Op(Conv) should be equal. But received: the input's shape is [%s], "
          "the input's dimension is %d; the filter's shape is [%s],  "
          "the filter's dimension is %d.",
83
          in_dims, in_dims.size(), filter_dims, filter_dims.size()));
84

85 86 87 88 89 90 91 92 93 94 95
  int stride_size = strides.size();
  for (int i = 0; i < stride_size; ++i) {
    PADDLE_ENFORCE_GT(
        strides[i], 0,
        platform::errors::InvalidArgument(
            "The stride of Op(Conv) should be larget than 0, but received "
            "stride is %d.",
            strides[i]));
  }

  int in_sub_stride_size = in_dims.size() - stride_size;
96 97 98
  PADDLE_ENFORCE_EQ(
      in_dims.size(), strides.size() + 2U,
      platform::errors::InvalidArgument(
99 100 101 102 103 104 105
          "The difference of input's dimension and Attr(strides)'s "
          "length must be euqal to 2 for Op(Conv). "
          "But received: input's dimension is %d, input's shape is [%s]; "
          "Attr(stride)'s length is %d, Attr(stride) is [%s]; "
          "difference of input's dimention and Attr(strides)'s length = %u.",
          in_dims.size(), in_dims, strides.size(),
          framework::make_ddim(strides), in_sub_stride_size));
L
liym27 已提交
106 107 108

  const auto input_channels =
      channel_last ? in_dims[in_dims.size() - 1] : in_dims[1];
F
fengjiayi 已提交
109

110 111
  PADDLE_ENFORCE_EQ(
      input_channels, filter_dims[1] * groups,
112
      platform::errors::InvalidArgument(
113 114 115 116 117
          "The number of input's channels should be equal to filter's channels "
          "* groups for Op(Conv). But received: the input's channels is %d, "
          "the input's shape is [%s]; the filter's channels is %d, the "
          "filter's shape is [%s]; the groups is %d, the data_format is %s. "
          "The error may come from wrong data_format setting.",
118 119
          input_channels, in_dims, filter_dims[1], filter_dims, groups,
          data_format));
C
chengduoZH 已提交
120
  PADDLE_ENFORCE_EQ(
Y
Yang Yu 已提交
121
      filter_dims[0] % groups, 0,
122
      platform::errors::InvalidArgument(
123 124 125 126
          "The number of output's channels (filter's first dimension) of "
          "Op(Conv) should be divided by groups. But received: "
          "the output channels is %d, the filter's shape is [%s], "
          "the groups is %d.",
127
          filter_dims[0], filter_dims, groups));
W
wangxinxin08 已提交
128 129 130 131 132 133 134

  if (ctx->IsRuntime()) {
    PADDLE_ENFORCE_GT(
        filter_dims[0], 0,
        platform::errors::InvalidArgument(
            "the size of filter at axis 0 should be greater than 0"));
  }
C
chengduoZH 已提交
135

L
liym27 已提交
136 137 138 139 140 141
  framework::DDim in_data_dims;
  if (channel_last) {
    in_data_dims = framework::slice_ddim(in_dims, 1, in_dims.size() - 1);
  } else {
    in_data_dims = framework::slice_ddim(in_dims, 2, in_dims.size());
  }
142

143 144
  framework::DDim filter_data_dims =
      framework::slice_ddim(filter_dims, 2, filter_dims.size());
145

L
liym27 已提交
146 147 148 149 150 151 152 153
  std::vector<int> ksize = framework::vectorize<int>(filter_data_dims);
  UpdatePaddingAndDilation(&paddings, &dilations, padding_algorithm,
                           in_data_dims, strides, ksize);

  std::vector<int64_t> output_shape({in_dims[0]});
  if (!channel_last) {
    output_shape.push_back(filter_dims[0]);
  }
154
  for (int i = 0; i < in_data_dims.size(); ++i) {
T
tink2123 已提交
155
    if ((!ctx->IsRuntime()) &&
L
liym27 已提交
156
        (in_data_dims[i] <= 0 || filter_dims[i + 2] <= 0)) {
T
tink2123 已提交
157 158
      output_shape.push_back(-1);
    } else {
159 160 161
      output_shape.push_back(
          ConvOutputSize(in_data_dims[i], filter_data_dims[i], dilations[i],
                         paddings[2 * i], paddings[2 * i + 1], strides[i]));
T
tink2123 已提交
162
    }
C
chengduoZH 已提交
163
  }
L
liym27 已提交
164 165 166 167
  if (channel_last) {
    output_shape.push_back(filter_dims[0]);
  }

168
  return output_shape;
C
chengduoZH 已提交
169 170
}

171 172
framework::OpKernelType ConvOp::GetExpectedKernelType(
    const framework::ExecutionContext& ctx) const {
X
Xin Pan 已提交
173 174
  int customized_type_value =
      framework::OpKernelType::kDefaultCustomizedTypeValue;
175
  framework::LibraryType library{framework::LibraryType::kPlain};
M
mozga-intel 已提交
176
  // TODO(pzelazko-intel): enable MKLDNN layout when it's ready
177
  auto input_data_type = OperatorWithKernel::IndicateVarDataType(ctx, "Input");
L
liym27 已提交
178 179
  std::string data_format =
      "AnyLayout";  // todo enable data layout when it's ready
M
mozga-intel 已提交
180 181
  framework::DataLayout layout = framework::StringToDataLayout(data_format);

182
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
183
  if (platform::CanCUDNNBeUsed(ctx)) {
184
    library = framework::LibraryType::kCUDNN;
C
chengduoZH 已提交
185 186
  }
#endif
187
#ifdef PADDLE_WITH_MKLDNN
188 189
  if (library == framework::LibraryType::kPlain &&
      this->CanMKLDNNBeUsed(ctx, input_data_type)) {
190
    library = framework::LibraryType::kMKLDNN;
M
mozga-intel 已提交
191
    layout = framework::DataLayout::kMKLDNN;
192
    customized_type_value =
193 194
        (input_data_type == framework::DataTypeTrait<int8_t>::DataType() ||
         input_data_type == framework::DataTypeTrait<uint8_t>::DataType())
195 196
            ? kConvMKLDNNINT8
            : kConvMKLDNNFP32;
197
  }
198
#endif
199

200
  if (input_data_type != framework::proto::VarType::INT8 &&
201 202
      input_data_type != framework::proto::VarType::UINT8 &&
      input_data_type != framework::proto::VarType::BF16) {
203
    auto filter_data_type = ctx.Input<Tensor>("Filter")->type();
204 205 206 207 208 209 210 211
    PADDLE_ENFORCE_EQ(
        input_data_type, filter_data_type,
        platform::errors::InvalidArgument(
            "input and filter data type should be consistent, "
            "but received input data type is %s and filter type "
            "is %s",
            paddle::framework::DataTypeToString(input_data_type),
            paddle::framework::DataTypeToString(filter_data_type)));
212
  }
213
#ifndef PADDLE_WITH_ASCEND_CL
K
Kexin Zhao 已提交
214
  if (input_data_type == framework::proto::VarType::FP16) {
215 216 217 218
    PADDLE_ENFORCE_EQ(
        library, framework::LibraryType::kCUDNN,
        platform::errors::InvalidArgument(
            "float16 can only be used when CUDNN or NPU is used"));
K
Kexin Zhao 已提交
219
  }
220
#endif
W
wuhuanzhou 已提交
221 222 223 224 225 226 227 228 229
#if PADDLE_WITH_CUDA
  if (input_data_type == framework::proto::VarType::BF16 &&
      library == framework::LibraryType::kCUDNN) {
    PADDLE_ENFORCE_GE(
        platform::CudnnVersion(), 8100,
        platform::errors::InvalidArgument(
            "bfloat16 can only be used when CUDNN_VERSION >= 8100"));
  }
#endif  // PADDLE_WITH_CUDA
K
Kexin Zhao 已提交
230

231 232 233
  auto type = framework::OpKernelType(input_data_type, ctx.GetPlace(), layout,
                                      library, customized_type_value);
  return type;
234 235
}

236 237 238 239 240 241 242 243 244 245 246 247 248
framework::OpKernelType ConvOp::GetKernelTypeForVar(
    const std::string& var_name, const Tensor& tensor,
    const framework::OpKernelType& expected_kernel_type) const {
#ifdef PADDLE_WITH_MKLDNN
  // Only input require reshaping, weights and
  // bias are having shape in NCHW order
  if ((var_name == "Input") &&
      (expected_kernel_type.data_layout_ == framework::DataLayout::kMKLDNN) &&
      (tensor.layout() != framework::DataLayout::kMKLDNN)) {
    auto attrs = Attrs();
    auto ar = paddle::framework::AttrReader(attrs);
    const std::string data_format = ar.Get<std::string>("data_format");
    auto dl = framework::StringToDataLayout(data_format);
249
    // Some models may have intentionally set "AnyLayout" for conv
250 251
    // op. Treat this as NCHW (default data_format value)
    if (dl != framework::DataLayout::kAnyLayout) {
252 253
      return framework::OpKernelType(expected_kernel_type.data_type_,
                                     tensor.place(), dl);
254 255 256 257 258 259 260
    }
  }
#endif
  return framework::OpKernelType(expected_kernel_type.data_type_,
                                 tensor.place(), tensor.layout());
}

Y
Yu Yang 已提交
261
void Conv2DOpMaker::Make() {
262 263 264
  AddAttr<bool>("is_test",
                "(bool, default false) Set to true for inference only, false "
                "for training. Some layers may run faster when this is true.")
265 266
      .SetDefault(false)
      .AsExtra();
L
liym27 已提交
267 268 269 270 271 272
  AddInput("Input",
           "(Tensor) The input tensor of convolution operator. "
           "The format of input tensor is NCHW or NHWC, where N is batch size, "
           "C is the "
           "number of channels, H is the height of the feature, "
           "and W is the width of the feature.");
C
chengduoZH 已提交
273
  AddInput("Filter",
C
fix doc  
chengduoZH 已提交
274
           "(Tensor) The filter tensor of convolution operator. "
C
chengduoZH 已提交
275 276
           "The format of the filter tensor is MCHW, where M is the number of "
           "output image channels, C is the number of input image channels, "
C
fix doc  
chengduoZH 已提交
277 278
           "H is the height of the filter, and W is the width of the filter. "
           "If the groups attribute is greater than 1, C equals the number of "
C
chengduoZH 已提交
279
           "input image channels divided by the groups.");
280 281 282 283
  AddInput("Bias",
           "(Tensor) Bias to be added to each output of filter application."
           "The format of output tensor is X (one-dimensional) of size equal"
           "to the number of output channels. Only used with MKL-DNN.")
284 285
      .AsDispensable()
      .AsExtra();
286 287 288
  AddInput("ResidualData",
           "(Tensor) Tensor with residual data "
           "to which convolution output will be added."
289
           "Used with fuse_residual_connection fusion.")
290 291
      .AsDispensable()
      .AsExtra();
Y
Yihua Xu 已提交
292 293
  AddOutput("Output",
            "(Tensor) The output tensor of convolution operator. "
L
liym27 已提交
294
            "It has same data fromat and data type as the Input.");
C
chengduoZH 已提交
295 296 297 298
  AddAttr<std::vector<int>>("strides",
                            "(vector<int> default:{1, 1}), the "
                            "strides(h_stride, w_stride) of "
                            "convolution operator.")
C
chengduoZH 已提交
299
      .SetDefault({1, 1});
C
chengduoZH 已提交
300 301
  AddAttr<std::vector<int>>("paddings",
                            "(vector<int> default:{0, 0}), the "
L
liym27 已提交
302 303
                            "paddings(pad_height_top, pad_height_bottom, "
                            "pad_width_left, pad_wifth_right)  of "
C
chengduoZH 已提交
304
                            "convolution operator.")
C
chengduoZH 已提交
305
      .SetDefault({0, 0});
L
liym27 已提交
306 307 308 309 310 311
  AddAttr<std::string>(
      "padding_algorithm",
      "(string, default \"EXPLICIT\") An optional string from: \"EXPLICIT\","
      "\"SAME\",\"VALID\". Set to \"EXPLICIT\" for explicit padding. "
      "Set to \"SAME\" or \"VALID\" for algorithm of padding. ")
      .SetDefault("EXPLICIT");
C
chengduoZH 已提交
312 313
  AddAttr<int>(
      "groups",
C
chengduoZH 已提交
314
      "(int default:1), the groups number of the convolution operator. "
C
fix doc  
chengduoZH 已提交
315 316 317 318
      "According to grouped convolution in Alex Krizhevsky's Deep CNN paper: "
      "when group=2, the first half of the filters is only connected to the "
      "first half of the input channels, while the second half of the filters "
      "is only connected to the second half of the input channels.")
C
chengduoZH 已提交
319
      .SetDefault(1);
C
chengduoZH 已提交
320
  AddAttr<std::vector<int>>("dilations",
C
chengduoZH 已提交
321 322
                            "(vector<int> default:{1, 1}), the "
                            "dilations(h_dilation, w_dilation) of "
C
chengduoZH 已提交
323
                            "convolution operator.")
C
chengduoZH 已提交
324
      .SetDefault({1, 1});
325 326 327
  AddAttr<bool>(
      "use_cudnn",
      "(bool, default false) Only used in cudnn kernel, need install cudnn")
328 329
      .SetDefault(false)
      .AsExtra();
330 331
  AddAttr<bool>("fuse_relu_before_depthwise_conv",
                "(bool, default false) Only used in cuda depthwise kernel")
332 333
      .SetDefault(false)
      .AsExtra();
334 335
  AddAttr<bool>("use_mkldnn",
                "(bool, default false) Only used in mkldnn kernel")
336 337
      .SetDefault(false)
      .AsExtra();
338 339 340 341
  AddAttr<bool>(
      "use_quantizer",
      "(bool, default false) "
      "This parameter is no longer used. Use 'mkldnn_data_type' instead.")
342 343
      .SetDefault(false)
      .AsExtra();
344 345 346 347
  AddAttr<std::string>(
      "mkldnn_data_type",
      "(string, default \"float32\"). Data type of mkldnn kernel")
      .SetDefault("float32")
348 349
      .InEnum({"float32", "int8", "bfloat16"})
      .AsExtra();
M
Michal Gallus 已提交
350
  AddAttr<bool>("fuse_relu", "(bool, default false) Only used in mkldnn kernel")
351 352
      .SetDefault(false)
      .AsExtra();
353 354
  AddAttr<bool>("fuse_brelu",
                "(bool, default false) Only used in mkldnn kernel")
355 356
      .SetDefault(false)
      .AsExtra();
357 358
  AddAttr<float>("fuse_brelu_threshold",
                 "(float, default false 6.0) Only used in mkldnn kernel")
359 360
      .SetDefault(6.0f)
      .AsExtra();
361 362
  AddAttr<std::string>("fuse_activation",
                       "(string, default \"\") Only used in mkldnn kernel")
363 364
      .SetDefault("")
      .AsExtra();
365 366
  AddAttr<float>("fuse_alpha",
                 "(float, default 0.0) Only used in mkldnn kernel")
367 368
      .SetDefault(0.0f)
      .AsExtra();
369
  AddAttr<float>("fuse_beta", "(float, default 0.0) Only used in mkldnn kernel")
370 371
      .SetDefault(0.0f)
      .AsExtra();
372 373 374 375
  AddAttr<bool>(
      "use_addto",
      "(bool, default false) If use addto strategy or not, only used in "
      "cudnn kernel")
376 377
      .SetDefault(false)
      .AsExtra();
378
  AddAttr<bool>("fuse_residual_connection",
379
                "(bool, default false) Only used in mkldnn kernel. Used "
380 381
                "whenever convolution output is as an input to residual "
                "connection.")
382 383
      .SetDefault(false)
      .AsExtra();
384 385 386
  AddAttr<float>("Scale_in",
                 "Scale_in to be used for int8 input data."
                 "Only used with MKL-DNN INT8.")
387 388
      .SetDefault(1.0f)
      .AsExtra();
389 390 391
  AddAttr<float>("Scale_out",
                 "Scale_out to be used for int8 output data."
                 "Only used with MKL-DNN INT8.")
392 393
      .SetDefault(1.0f)
      .AsExtra();
394 395 396
  AddAttr<float>("Scale_in_eltwise",
                 "Scale_in_eltwise to be used for int8 eltwise input data."
                 "Only used with MKL-DNN INT8.")
397 398
      .SetDefault(1.0f)
      .AsExtra();
399 400 401
  AddAttr<std::vector<float>>("Scale_weights",
                              "Scale_weights to be used for int8 weights data."
                              "Only used with MKL-DNN INT8.")
402 403
      .SetDefault({1.0f})
      .AsExtra();
404 405 406
  AddAttr<bool>("force_fp32_output",
                "(bool, default false) Force INT8 kernel output FP32, only "
                "used in MKL-DNN INT8")
407 408
      .SetDefault(false)
      .AsExtra();
409 410 411 412 413 414
  AddAttr<std::string>(
      "data_format",
      "(string, default NCHW) Only used in "
      "An optional string from: \"NHWC\", \"NCHW\". "
      "Defaults to \"NHWC\". Specify the data format of the output data, "
      "the input will be transformed automatically. ")
L
liym27 已提交
415
      .SetDefault("NCHW");
416 417 418 419 420 421 422 423
  // TODO(dzhwinter): need to registered layout transform function
  AddAttr<int>("workspace_size_MB",
               "Only used in cudnn kernel. Need set use_cudnn to true."
               "workspace size for cudnn, in MB, "
               "workspace is a section of GPU memory which will be "
               "allocated/freed each time the operator runs, larger "
               "workspace size can increase performance but also requires "
               "better hardware. This size should be chosen carefully.")
424 425
      .SetDefault(platform::GetDefaultConvWorkspaceSizeLimitMB())
      .AsExtra();
426 427
  AddAttr<bool>("exhaustive_search",
                "(bool, default false) cuDNN has many algorithm to calculation "
C
chengduo 已提交
428
                "convolution, whether enable exhaustive search "
翟飞跃 已提交
429
                "for cuDNN convolution or not, default is False.")
430 431
      .SetDefault(false)
      .AsExtra();
L
liym27 已提交
432

C
chengduoZH 已提交
433
  AddComment(R"DOC(
C
fix doc  
chengduoZH 已提交
434 435
Convolution Operator.

C
chengduoZH 已提交
436
The convolution operation calculates the output based on the input, filter
C
chengduoZH 已提交
437
and strides, paddings, dilations, groups parameters. The size of each dimension of the
C
chengduoZH 已提交
438
parameters is checked in the infer-shape.
L
liym27 已提交
439
Input(Input) and Output(Output) are in NCHW or NHWC format. Where N is batch
C
fix doc  
chengduoZH 已提交
440
size, C is the number of channels, H is the height of the feature, and W is
C
chengduoZH 已提交
441
the width of the feature.
442
Filters(Input) is MCHW format format. Where M is the number of output image channels, C is
C
chengduoZH 已提交
443 444 445 446
the number of input image channels, H is the height of the filter, and W
is the width of the filter.
Parameters(strides, paddings, dilations) are two elements. These two elements represent
height and width, respectively.
C
chengduoZH 已提交
447 448 449 450
The input(X) size and output(Out) size may be different.

Example:
  Input:
C
chengduoZH 已提交
451 452
       Input shape: $(N, C_{in}, H_{in}, W_{in})$
       Filter shape: $(C_{out}, C_{in}, H_f, W_f)$
C
chengduoZH 已提交
453
  Output:
C
chengduoZH 已提交
454 455 456
       Output shape: $(N, C_{out}, H_{out}, W_{out})$
  Where
$$
L
liym27 已提交
457 458
       H_{out}= \frac{(H_{in} + pad_height_top + pad_height_bottom - (dilations[0] * (H_f - 1) + 1))}{strides[0]}+ 1 \\
       W_{out}= \frac{(W_{in} + pad_width_left + pad_width_right - (dilations[1] * (W_f - 1) + 1))}{strides[1]}+ 1
C
chengduoZH 已提交
459
$$
C
chengduoZH 已提交
460
)DOC");
Q
qingqing01 已提交
461
  Apply();
C
chengduoZH 已提交
462 463
}

Y
Yu Yang 已提交
464
void Conv3DOpMaker::Make() {
465 466 467
  AddAttr<bool>("is_test",
                "(bool, default false) Set to true for inference only, false "
                "for training. Some layers may run faster when this is true.")
468 469
      .SetDefault(false)
      .AsExtra();
C
chengduoZH 已提交
470 471
  AddInput(
      "Input",
C
fix doc  
chengduoZH 已提交
472
      "(Tensor) The input tensor of convolution operator. "
L
liym27 已提交
473 474
      "The format of input tensor is NCDHW or NDHWC. Where N is batch size, C "
      "is the "
C
fix doc  
chengduoZH 已提交
475 476 477
      "number of channels, D is the depth of the feature, H is the height of "
      "the feature, "
      "and W is the width of the feature.");
C
chengduoZH 已提交
478
  AddInput("Filter",
C
fix doc  
chengduoZH 已提交
479
           "(Tensor) The filter tensor of convolution operator. "
C
chengduoZH 已提交
480 481
           "The format of the filter tensor is MCDHW, where M is the number of "
           "output image channels, C is the number of input image channels, "
C
fix doc  
chengduoZH 已提交
482 483 484
           "D is the depth of the filter, H is the height of the filter, and W "
           "is the width of the filter."
           "If the groups attribute is greater than 1, C equals the number of "
C
chengduoZH 已提交
485
           "input image channels divided by the groups.");
486 487 488 489
  AddInput("ResidualData",
           "(Tensor) Tensor with residual data "
           "to which convolution output will be added."
           "Used with fuse_residual_connection fusion.")
490 491
      .AsDispensable()
      .AsExtra();
Y
Yihua Xu 已提交
492 493
  AddOutput("Output",
            "(Tensor) The output tensor of convolution operator."
L
liym27 已提交
494
            "It has same data fromat and data type as the Input.");
C
chengduoZH 已提交
495 496 497 498
  AddAttr<std::vector<int>>("strides",
                            "(vector<int>, default:{1, 1, 1}), the "
                            "strides(d_stride, h_stride, w_stride) of "
                            "convolution operator.")
C
chengduoZH 已提交
499
      .SetDefault({1, 1, 1});
L
liym27 已提交
500 501 502 503 504 505
  AddAttr<std::vector<int>>(
      "paddings",
      "(vector<int>, default:{0, 0, 0}), the "
      "paddings(pad_depth_front, pad_depth_back, pad_height_top, "
      "pad_height_bottom, pad_width_left, pad_width_right) of convolution "
      "operator.")
C
chengduoZH 已提交
506
      .SetDefault({0, 0, 0});
L
liym27 已提交
507 508 509 510 511 512
  AddAttr<std::string>(
      "padding_algorithm",
      "(string, default \"EXPLICIT\") An optional string from: \"EXPLICIT\","
      "\"SAME\",\"VALID\". Set to \"EXPLICIT\" for explicit padding. "
      "Set to \"SAME\" or \"VALID\" for algorithm of padding. ")
      .SetDefault("EXPLICIT");
C
chengduoZH 已提交
513 514
  AddAttr<int>(
      "groups",
C
chengduoZH 已提交
515
      "(int default:1), the groups number of the convolution operator. "
C
fix doc  
chengduoZH 已提交
516 517 518 519
      "According to grouped convolution in Alex Krizhevsky's Deep CNN paper: "
      "when group=2, the first half of the filters is only connected to the "
      "first half of the input channels, while the second half of the filters "
      "is only connected to the second half of the input channels.")
C
chengduoZH 已提交
520
      .SetDefault(1);
C
chengduoZH 已提交
521
  AddAttr<std::vector<int>>("dilations",
C
chengduoZH 已提交
522 523
                            "(vector<int> default:{1, 1, 1}), the "
                            "dilations(d_dilation, h_dilation, w_dilation) of "
C
chengduoZH 已提交
524
                            "convolution operator.")
C
chengduoZH 已提交
525
      .SetDefault({1, 1, 1});
526 527 528
  AddAttr<bool>(
      "use_cudnn",
      "(bool, default false) Only used in cudnn kernel, need install cudnn")
529 530
      .SetDefault(false)
      .AsExtra();
531 532
  AddAttr<bool>("use_mkldnn",
                "(bool, default false) Only used in mkldnn kernel")
533 534
      .SetDefault(false)
      .AsExtra();
535 536 537 538
  AddAttr<std::string>(
      "mkldnn_data_type",
      "(string, default \"float32\"). Data type of mkldnn kernel")
      .SetDefault("float32")
539 540
      .InEnum({"float32", "int8", "bfloat16"})
      .AsExtra();
541
  AddAttr<bool>("fuse_relu", "(bool, default false) Only used in mkldnn kernel")
542 543
      .SetDefault(false)
      .AsExtra();
544 545
  AddAttr<std::string>("fuse_activation",
                       "(string, default \"\") Only used in mkldnn kernel")
546 547
      .SetDefault("")
      .AsExtra();
548 549
  AddAttr<float>("fuse_alpha",
                 "(float, default 0.0) Only used in mkldnn kernel")
550 551
      .SetDefault(0.0f)
      .AsExtra();
552
  AddAttr<float>("fuse_beta", "(float, default 0.0) Only used in mkldnn kernel")
553 554
      .SetDefault(0.0f)
      .AsExtra();
555 556 557 558
  AddAttr<bool>(
      "use_addto",
      "(bool, default false) If use addto strategy or not, only used in "
      "cudnn kernel")
559 560
      .SetDefault(false)
      .AsExtra();
561 562 563 564
  AddAttr<bool>("fuse_residual_connection",
                "(bool, default false) Only used in mkldnn kernel. Used "
                "whenever convolution output is as an input to residual "
                "connection.")
565 566
      .SetDefault(false)
      .AsExtra();
567 568
  AddAttr<std::string>(
      "data_format",
L
liym27 已提交
569 570 571
      "(string, default NCDHW) Only used in "
      "An optional string from: \"NDHWC\", \"NCDHW\". "
      "Defaults to \"NDHWC\". Specify the data format of the output data, "
572
      "the input will be transformed automatically. ")
L
liym27 已提交
573
      .SetDefault("NCDHW");
574 575
  AddAttr<bool>("force_fp32_output",
                "(bool, default false) Only used in mkldnn INT8 kernel")
576 577
      .SetDefault(false)
      .AsExtra();
578 579 580 581 582 583 584
  // TODO(dzhwinter): need to registered layout transform function
  AddAttr<int>("workspace_size_MB",
               "Only used in cudnn kernel. workspace size for cudnn, in MB, "
               "workspace is a section of GPU memory which will be "
               "allocated/freed each time the operator runs, larger "
               "workspace size can increase performance but also requires "
               "better hardware. This size should be chosen carefully.")
585 586
      .SetDefault(platform::GetDefaultConvWorkspaceSizeLimitMB())
      .AsExtra();
587 588
  AddAttr<bool>("exhaustive_search",
                "(bool, default false) cuDNN has many algorithm to calculation "
C
chengduo 已提交
589
                "convolution, whether enable exhaustive search "
翟飞跃 已提交
590
                "for cuDNN convolution or not, default is False.")
591 592
      .SetDefault(false)
      .AsExtra();
C
chengduoZH 已提交
593
  AddComment(R"DOC(
C
fix doc  
chengduoZH 已提交
594 595
Convolution3D Operator.

C
chengduoZH 已提交
596
The convolution operation calculates the output based on the input, filter
C
chengduoZH 已提交
597
and strides, paddings, dilations, groups parameters. The size of each dimension of the
C
chengduoZH 已提交
598
parameters is checked in the infer-shape.
L
liym27 已提交
599
Input(Input) and output(Output) are in NCDHW or NDHWC format, where N is batch
C
fix doc  
chengduoZH 已提交
600
size, C is the number of channels,D is the depth of the feature, H is the height of
C
chengduoZH 已提交
601 602 603 604 605 606
the feature, and W is the width of the feature.
Filters(Input) is MCDHW format, where M is the number of output image channels,
C is the number of input image channels, D is the depth of the filter,
H is the height of the filter, and W is the width of the filter.
Parameters(strides, paddings, dilations) are three elements. These three elements
represent depth, height and width, respectively.
C
fix doc  
chengduoZH 已提交
607 608 609 610
The input(X) size and output(Out) size may be different.

Example:
  Input:
C
chengduoZH 已提交
611 612
       Input shape: $(N, C_{in}, D_{in}, H_{in}, W_{in})$
       Filter shape: $(C_{out}, C_{in}, D_f, H_f, W_f)$
C
fix doc  
chengduoZH 已提交
613
  Output:
C
chengduoZH 已提交
614 615 616
       Output shape: $(N, C_{out}, D_{out}, H_{out}, W_{out})$
  Where
  $$
L
liym27 已提交
617 618 619
       D_{out}= \frac{(D_{in} + pad_depth_front + pad_depth_back - (dilations[0] * (D_f - 1) + 1))}{ strides[0]}+ 1 \\
       H_{out}= \frac{(H_{in} + pad_height_top + pad_height_bottom - (dilations[1] * (H_f - 1) + 1))}{ strides[1]}+ 1 \\
       W_{out}= \frac{(W_{in} + pad_width_left + pad_width_right - (dilations[2] * (W_f - 1) + 1))}{ strides[2]}+ 1
C
chengduoZH 已提交
620
  $$
C
chengduoZH 已提交
621
)DOC");
Q
qingqing01 已提交
622
  Apply();
C
chengduoZH 已提交
623 624
}

C
chengduoZH 已提交
625 626 627 628 629 630 631 632 633 634 635
void ConvOpGrad::InferShape(framework::InferShapeContext* ctx) const {
  auto in_dims = ctx->GetInputDim("Input");
  auto filter_dims = ctx->GetInputDim("Filter");
  if (ctx->HasOutput(framework::GradVarName("Input"))) {
    ctx->SetOutputDim(framework::GradVarName("Input"), in_dims);
  }
  if (ctx->HasOutput(framework::GradVarName("Filter"))) {
    ctx->SetOutputDim(framework::GradVarName("Filter"), filter_dims);
  }
}

636 637
framework::OpKernelType ConvOpGrad::GetExpectedKernelType(
    const framework::ExecutionContext& ctx) const {
X
Xin Pan 已提交
638 639
  int customized_type_value =
      framework::OpKernelType::kDefaultCustomizedTypeValue;
640
  framework::LibraryType library_{framework::LibraryType::kPlain};
M
mozga-intel 已提交
641
  // TODO(pzelazko-intel): enable MKLDNN layout when it's ready
L
liym27 已提交
642
  std::string data_format = "AnyLayout";
M
mozga-intel 已提交
643
  framework::DataLayout layout_ = framework::StringToDataLayout(data_format);
644
  auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "Input");
M
mozga-intel 已提交
645

646
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
647 648
  if (platform::CanCUDNNBeUsed(ctx)) {
    library_ = framework::LibraryType::kCUDNN;
C
chengduoZH 已提交
649 650
  }
#endif
651 652
#ifdef PADDLE_WITH_MKLDNN
  if (library_ == framework::LibraryType::kPlain &&
653
      this->CanMKLDNNBeUsed(ctx, data_type)) {
654
    const std::string data_format = ctx.Attr<std::string>("data_format");
655
    library_ = framework::LibraryType::kMKLDNN;
M
mozga-intel 已提交
656
    layout_ = framework::DataLayout::kMKLDNN;
X
Xin Pan 已提交
657
    customized_type_value = kConvMKLDNNFP32;
658
  }
659
#endif
660

661 662
  auto type = framework::OpKernelType(data_type, ctx.GetPlace(), layout_,
                                      library_, customized_type_value);
663
  return type;
664 665
}

666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691
framework::OpKernelType ConvOpGrad::GetKernelTypeForVar(
    const std::string& var_name, const Tensor& tensor,
    const framework::OpKernelType& expected_kernel_type) const {
#ifdef PADDLE_WITH_MKLDNN
  // Only input require reshaping, weights and
  // bias are having shape in NCHW order
  if (((var_name == "Input") ||
       (var_name == framework::GradVarName("Output"))) &&
      (expected_kernel_type.data_layout_ == framework::DataLayout::kMKLDNN) &&
      (tensor.layout() != framework::DataLayout::kMKLDNN)) {
    auto attrs = Attrs();
    auto ar = paddle::framework::AttrReader(attrs);
    const std::string data_format = ar.Get<std::string>("data_format");
    auto dl = framework::StringToDataLayout(data_format);
    // Some models may have intentionally set "AnyLayout" for pool
    // op. Treat this as NCHW (default data_format value)
    if (dl != framework::DataLayout::kAnyLayout) {
      return framework::OpKernelType(expected_kernel_type.data_type_,
                                     tensor.place(), dl);
    }
  }
#endif
  return framework::OpKernelType(expected_kernel_type.data_type_,
                                 tensor.place(), tensor.layout());
}

H
hong 已提交
692 693
template <typename T>
class Conv2DGradMaker : public framework::SingleGradOpMaker<T> {
694
 public:
H
hong 已提交
695
  using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
696

697
  void Apply(GradOpPtr<T> op) const override {
S
sneaxiy 已提交
698
    op->SetType(this->ForwardOpType() + "_grad");
H
hong 已提交
699 700 701
    op->SetInput("Input", this->Input("Input"));
    op->SetInput("Filter", this->Input("Filter"));
    op->SetInput(framework::GradVarName("Output"), this->OutputGrad("Output"));
702

H
hong 已提交
703 704
    op->SetOutput(framework::GradVarName("Input"), this->InputGrad("Input"));
    op->SetOutput(framework::GradVarName("Filter"), this->InputGrad("Filter"));
705 706 707 708 709

    if (this->HasInput("Bias")) {
      op->SetInput("Bias", this->Input("Bias"));
      op->SetOutput(framework::GradVarName("Bias"), this->InputGrad("Bias"));
    }
H
hong 已提交
710
    op->SetAttrMap(this->Attrs());
711
  }
S
sneaxiy 已提交
712 713
};

H
hong 已提交
714 715
template <typename T>
class Conv3DGradMaker : public framework::SingleGradOpMaker<T> {
S
sneaxiy 已提交
716
 public:
H
hong 已提交
717
  using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
718

719
  void Apply(GradOpPtr<T> op) const override {
S
sneaxiy 已提交
720
    op->SetType(this->ForwardOpType() + "_grad");
H
hong 已提交
721 722 723
    op->SetInput("Input", this->Input("Input"));
    op->SetInput("Filter", this->Input("Filter"));
    op->SetInput(framework::GradVarName("Output"), this->OutputGrad("Output"));
S
sneaxiy 已提交
724

H
hong 已提交
725 726
    op->SetOutput(framework::GradVarName("Input"), this->InputGrad("Input"));
    op->SetOutput(framework::GradVarName("Filter"), this->InputGrad("Filter"));
S
sneaxiy 已提交
727

H
hong 已提交
728 729
    if (this->HasInput("ResidualData")) {
      op->SetInput("ResidualData", this->Input("ResidualData"));
S
sneaxiy 已提交
730 731
    }

H
hong 已提交
732
    op->SetAttrMap(this->Attrs());
733 734 735
  }
};

Q
qingqing01 已提交
736 737 738 739
/*
 * Inputs:  I, W, dO, ddI, ddW
 * Outputs: ddO, dW, dI
 */
H
hong 已提交
740 741
template <typename T>
class Conv2DDoubleGradMaker : public framework::SingleGradOpMaker<T> {
Q
qingqing01 已提交
742
 public:
H
hong 已提交
743
  using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
Q
qingqing01 已提交
744

745
  void Apply(GradOpPtr<T> op) const override {
Q
qingqing01 已提交
746 747
    op->SetType(this->ForwardOpType() + "_grad");
    // I, W, dO, ddI, ddW
H
hong 已提交
748 749 750 751 752 753
    op->SetInput("Input", this->Input("Input"));
    op->SetInput("Filter", this->Input("Filter"));
    op->SetInput("DOutput", this->Input(framework::GradVarName("Output")));
    op->SetInput("DDInput", this->OutputGrad(framework::GradVarName("Input")));
    op->SetInput("DDFilter",
                 this->OutputGrad(framework::GradVarName("Filter")));
Q
qingqing01 已提交
754 755 756 757

    // ddO, dI, dW
    // Unlike grad op, double grad op does not use name@GRAD@GRAD
    // as key of ops' inputs and outputs.
H
hong 已提交
758 759
    auto ddx = this->OutputGrad(framework::GradVarName("Input"));
    auto ddw = this->OutputGrad(framework::GradVarName("Filter"));
760

L
lvmengsi 已提交
761
    op->SetOutput("DDOutput",
H
hong 已提交
762
                  ddx.empty()
763
                      ? this->EmptyInputGrad()
H
hong 已提交
764
                      : this->InputGrad(framework::GradVarName("Output")));
765 766 767 768
    op->SetOutput("DFilter", ddx.empty() ? this->EmptyInputGrad()
                                         : this->InputGrad("Filter"));
    op->SetOutput("DInput", ddw.empty() ? this->EmptyInputGrad()
                                        : this->InputGrad("Input"));
769

H
hong 已提交
770
    op->SetAttrMap(this->Attrs());
Q
qingqing01 已提交
771 772 773
  }
};

L
lvmengsi 已提交
774 775 776 777
/*
 * Inputs:  I, W, dO, ddI, ddW
 * Outputs: ddO, dW, dI
 */
H
hong 已提交
778 779
template <typename T>
class Conv3DDoubleGradMaker : public framework::SingleGradOpMaker<T> {
L
lvmengsi 已提交
780
 public:
H
hong 已提交
781
  using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
L
lvmengsi 已提交
782

783
  void Apply(GradOpPtr<T> op) const override {
L
lvmengsi 已提交
784 785
    op->SetType(this->ForwardOpType() + "_grad");
    // I, W, dO, ddI, ddW
H
hong 已提交
786 787 788 789 790 791
    op->SetInput("Input", this->Input("Input"));
    op->SetInput("Filter", this->Input("Filter"));
    op->SetInput("DOutput", this->Input(framework::GradVarName("Output")));
    op->SetInput("DDInput", this->OutputGrad(framework::GradVarName("Input")));
    op->SetInput("DDFilter",
                 this->OutputGrad(framework::GradVarName("Filter")));
L
lvmengsi 已提交
792

H
hong 已提交
793 794
    auto ddx = this->OutputGrad(framework::GradVarName("Input"));
    auto ddw = this->OutputGrad(framework::GradVarName("Filter"));
L
lvmengsi 已提交
795

L
lvmengsi 已提交
796
    op->SetOutput("DDOutput",
H
hong 已提交
797
                  ddx.empty()
798
                      ? this->EmptyInputGrad()
H
hong 已提交
799
                      : this->InputGrad(framework::GradVarName("Output")));
800 801 802 803
    op->SetOutput("DFilter", ddx.empty() ? this->EmptyInputGrad()
                                         : this->InputGrad("Filter"));
    op->SetOutput("DInput", ddw.empty() ? this->EmptyInputGrad()
                                        : this->InputGrad("Input"));
L
lvmengsi 已提交
804

H
hong 已提交
805
    op->SetAttrMap(this->Attrs());
L
lvmengsi 已提交
806 807 808
  }
};

Q
qingqing01 已提交
809 810 811 812 813
void ConvOpDoubleGrad::InferShape(framework::InferShapeContext* ctx) const {
  auto x_dims = ctx->GetInputDim("Input");
  auto w_dims = ctx->GetInputDim("Filter");
  auto do_dims = ctx->GetInputDim("DOutput");

L
lvmengsi 已提交
814 815
  if (ctx->HasOutput("DDOutput") &&
      (ctx->HasInput("DDInput") || (ctx->HasInput("DDFilter")))) {
Q
qingqing01 已提交
816 817
    ctx->SetOutputDim("DDOutput", do_dims);
  }
818
  if (ctx->HasOutput("DFilter") && ctx->HasInput("DDInput")) {
Q
qingqing01 已提交
819 820
    ctx->SetOutputDim("DFilter", w_dims);
  }
821
  if (ctx->HasOutput("DInput") && ctx->HasInput("DDFilter")) {
Q
qingqing01 已提交
822 823 824 825 826 827 828 829 830
    ctx->SetOutputDim("DInput", x_dims);
  }
}

framework::OpKernelType ConvOpDoubleGrad::GetExpectedKernelType(
    const framework::ExecutionContext& ctx) const {
  int customized_type_value =
      framework::OpKernelType::kDefaultCustomizedTypeValue;
  framework::LibraryType library_{framework::LibraryType::kPlain};
L
liym27 已提交
831
  std::string data_format = "AnyLayout";
Q
qingqing01 已提交
832 833
  framework::DataLayout layout_ = framework::StringToDataLayout(data_format);

834
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
Q
qingqing01 已提交
835 836
  if (platform::CanCUDNNBeUsed(ctx)) {
    library_ = framework::LibraryType::kCUDNN;
L
lvmengsi 已提交
837
  }
Q
qingqing01 已提交
838
#endif
839 840 841
  auto type = framework::OpKernelType(
      OperatorWithKernel::IndicateVarDataType(ctx, "Input"), ctx.GetPlace(),
      layout_, library_, customized_type_value);
Q
qingqing01 已提交
842 843 844
  return type;
}

C
chengduoZH 已提交
845 846 847 848
}  // namespace operators
}  // namespace paddle

namespace ops = paddle::operators;
Y
Yang Yang 已提交
849
REGISTER_OPERATOR(conv2d, ops::ConvOp, ops::Conv2DOpMaker,
H
hong 已提交
850 851 852 853 854 855
                  ops::ConvOpInferVarType,
                  ops::Conv2DGradMaker<paddle::framework::OpDesc>,
                  ops::Conv2DGradMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(conv2d_grad, ops::ConvOpGrad,
                  ops::Conv2DDoubleGradMaker<paddle::framework::OpDesc>,
                  ops::Conv2DDoubleGradMaker<paddle::imperative::OpBase>);
Q
qingqing01 已提交
856
REGISTER_OPERATOR(conv2d_grad_grad, ops::ConvOpDoubleGrad);
857 858

// depthwise convolution op
Y
Yang Yang 已提交
859
REGISTER_OPERATOR(depthwise_conv2d, ops::ConvOp, ops::Conv2DOpMaker,
H
hong 已提交
860 861 862
                  ops::ConvOpInferVarType,
                  ops::Conv2DGradMaker<paddle::framework::OpDesc>,
                  ops::Conv2DGradMaker<paddle::imperative::OpBase>);
863 864 865 866
REGISTER_OPERATOR(depthwise_conv2d_grad, ops::ConvOpGrad,
                  ops::Conv2DDoubleGradMaker<paddle::framework::OpDesc>,
                  ops::Conv2DDoubleGradMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(depthwise_conv2d_grad_grad, ops::ConvOpDoubleGrad);
C
chengduo 已提交
867

Y
Yang Yang 已提交
868
REGISTER_OPERATOR(conv3d, ops::ConvOp, ops::Conv3DOpMaker,
H
hong 已提交
869 870 871 872 873 874
                  ops::ConvOpInferVarType,
                  ops::Conv3DGradMaker<paddle::framework::OpDesc>,
                  ops::Conv3DGradMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(conv3d_grad, ops::ConvOpGrad,
                  ops::Conv3DDoubleGradMaker<paddle::framework::OpDesc>,
                  ops::Conv3DDoubleGradMaker<paddle::imperative::OpBase>);
L
lvmengsi 已提交
875
REGISTER_OPERATOR(conv3d_grad_grad, ops::ConvOpDoubleGrad);
C
chengduoZH 已提交
876

877 878
// depthwise conv kernel
// TODO(xingzhaolong): neon kernel for mobile
Z
zlx 已提交
879
REGISTER_OP_CPU_KERNEL(
880
    depthwise_conv2d,
X
xzl 已提交
881 882 883 884
    ops::GemmConvKernel<paddle::platform::CPUDeviceContext, float>,
    ops::GemmConvKernel<paddle::platform::CPUDeviceContext, double>);

REGISTER_OP_CPU_KERNEL(
885
    depthwise_conv2d_grad,
X
xzl 已提交
886 887
    ops::GemmConvGradKernel<paddle::platform::CPUDeviceContext, float>,
    ops::GemmConvGradKernel<paddle::platform::CPUDeviceContext, double>);
Z
zlx 已提交
888

C
chengduoZH 已提交
889
REGISTER_OP_CPU_KERNEL(
Q
QI JUN 已提交
890 891 892 893 894 895
    conv2d, ops::GemmConvKernel<paddle::platform::CPUDeviceContext, float>,
    ops::GemmConvKernel<paddle::platform::CPUDeviceContext, double>);
REGISTER_OP_CPU_KERNEL(
    conv2d_grad,
    ops::GemmConvGradKernel<paddle::platform::CPUDeviceContext, float>,
    ops::GemmConvGradKernel<paddle::platform::CPUDeviceContext, double>);
L
lvmengsi 已提交
896 897 898 899
REGISTER_OP_CPU_KERNEL(
    conv2d_grad_grad,
    ops::GemmConvDoubleGradKernel<paddle::platform::CPUDeviceContext, float>,
    ops::GemmConvDoubleGradKernel<paddle::platform::CPUDeviceContext, double>);
C
chengduoZH 已提交
900 901

REGISTER_OP_CPU_KERNEL(
Q
QI JUN 已提交
902 903 904 905 906 907
    conv3d, ops::GemmConvKernel<paddle::platform::CPUDeviceContext, float>,
    ops::GemmConvKernel<paddle::platform::CPUDeviceContext, double>);
REGISTER_OP_CPU_KERNEL(
    conv3d_grad,
    ops::GemmConvGradKernel<paddle::platform::CPUDeviceContext, float>,
    ops::GemmConvGradKernel<paddle::platform::CPUDeviceContext, double>);
L
lvmengsi 已提交
908 909 910 911
REGISTER_OP_CPU_KERNEL(
    conv3d_grad_grad,
    ops::GemmConvDoubleGradKernel<paddle::platform::CPUDeviceContext, float>,
    ops::GemmConvDoubleGradKernel<paddle::platform::CPUDeviceContext, double>);
912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944

REGISTER_OP_VERSION(conv2d)
    .AddCheckpoint(
        R"ROC(
      Upgrade conv2d, add a new attribute [use_addto].
    )ROC",
        paddle::framework::compatible::OpVersionDesc().NewAttr(
            "use_addto",
            "In order to support new feature (inplace addto strategy) for "
            "gradient accumulation.",
            false));

REGISTER_OP_VERSION(depthwise_conv2d)
    .AddCheckpoint(
        R"ROC(
      Upgrade depthwise_conv2d, add a new attribute [use_addto].
    )ROC",
        paddle::framework::compatible::OpVersionDesc().NewAttr(
            "use_addto",
            "In order to support new feature (inplace addto strategy) for "
            "gradient accumulation.",
            false));

REGISTER_OP_VERSION(conv3d)
    .AddCheckpoint(
        R"ROC(
      Upgrade conv3d, add a new attribute [use_addto].
    )ROC",
        paddle::framework::compatible::OpVersionDesc().NewAttr(
            "use_addto",
            "In order to support new feature (inplace addto strategy) for "
            "gradient accumulation.",
            false));