conv_op.cc 34.0 KB
Newer Older
1
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
C
chengduoZH 已提交
2

L
Luo Tao 已提交
3 4 5
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
C
chengduoZH 已提交
6

L
Luo Tao 已提交
7
    http://www.apache.org/licenses/LICENSE-2.0
C
chengduoZH 已提交
8

L
Luo Tao 已提交
9 10 11 12 13
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
C
chengduoZH 已提交
14

Y
Yi Wang 已提交
15
#include "paddle/fluid/operators/conv_op.h"
Y
Update  
Yi Wang 已提交
16

17
#include <memory>
Y
Update  
Yi Wang 已提交
18 19 20
#include <string>
#include <vector>

21
#include "paddle/fluid/framework/op_version_registry.h"
22
#include "paddle/fluid/platform/device/gpu/gpu_dnn.h"
23

24 25 26
#ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/platform/mkldnn_helper.h"
#endif
H
hong 已提交
27
#include "paddle/fluid/framework/infershape_utils.h"
28
#include "paddle/fluid/platform/cudnn_workspace_helper.h"
H
hong 已提交
29 30
#include "paddle/phi/infermeta/binary.h"

C
chengduoZH 已提交
31 32 33
namespace paddle {
namespace operators {

34 35
std::vector<int64_t> ConvOp::ComputeOutputShape(
    framework::InferShapeContext* ctx) const {
36 37
  OP_INOUT_CHECK(ctx->HasInput("Input"), "Input", "Input", "Conv");
  OP_INOUT_CHECK(ctx->HasInput("Filter"), "Input", "Filter", "Conv");
C
chengduoZH 已提交
38 39 40

  auto in_dims = ctx->GetInputDim("Input");
  auto filter_dims = ctx->GetInputDim("Filter");
41

C
chengduoZH 已提交
42 43
  std::vector<int> strides = ctx->Attrs().Get<std::vector<int>>("strides");
  std::vector<int> paddings = ctx->Attrs().Get<std::vector<int>>("paddings");
L
liym27 已提交
44 45
  std::string padding_algorithm =
      ctx->Attrs().Get<std::string>("padding_algorithm");
C
chengduoZH 已提交
46
  int groups = ctx->Attrs().Get<int>("groups");
C
chengduoZH 已提交
47
  std::vector<int> dilations = ctx->Attrs().Get<std::vector<int>>("dilations");
48 49 50
  int dilation_size = dilations.size();
  for (int i = 0; i < dilation_size; ++i) {
    PADDLE_ENFORCE_GT(
51 52
        dilations[i],
        0,
53 54 55 56 57
        platform::errors::InvalidArgument(
            "The dilation of Op(Conv) should be larget than 0, but received "
            "dilation is %d.",
            dilations[i]));
  }
L
liym27 已提交
58
  const std::string data_format = ctx->Attrs().Get<std::string>("data_format");
59 60 61

  // MKL-DNN Kernels are using NCHW order of dims description
  // so we ignore data_format consideration for MKL-DNN kernel
62
  const bool channel_last = (ctx->IsRunMKLDNNKernel() == false) &&
63
                            (data_format == "NHWC" || data_format == "NDHWC");
C
chengduoZH 已提交
64

65
  PADDLE_ENFORCE_EQ(
66 67
      in_dims.size() == 4 || in_dims.size() == 5,
      true,
68
      platform::errors::InvalidArgument(
69 70
          "The input of Op(Conv) should be a 4-D or 5-D Tensor. But "
          "received: input's dimension is %u, input's shape is [%s].",
71 72
          in_dims.size(),
          in_dims));
73

C
chengduoZH 已提交
74
  PADDLE_ENFORCE_EQ(
75 76
      in_dims.size(),
      filter_dims.size(),
77
      platform::errors::InvalidArgument(
78 79 80 81
          "The input's dimension and filter's dimension of "
          "Op(Conv) should be equal. But received: the input's shape is [%s], "
          "the input's dimension is %d; the filter's shape is [%s],  "
          "the filter's dimension is %d.",
82 83 84 85
          in_dims,
          in_dims.size(),
          filter_dims,
          filter_dims.size()));
86

87 88 89
  int stride_size = strides.size();
  for (int i = 0; i < stride_size; ++i) {
    PADDLE_ENFORCE_GT(
90 91
        strides[i],
        0,
92 93 94 95 96 97 98
        platform::errors::InvalidArgument(
            "The stride of Op(Conv) should be larget than 0, but received "
            "stride is %d.",
            strides[i]));
  }

  int in_sub_stride_size = in_dims.size() - stride_size;
99
  PADDLE_ENFORCE_EQ(
100 101
      in_dims.size(),
      strides.size() + 2U,
102
      platform::errors::InvalidArgument(
103 104 105 106 107
          "The difference of input's dimension and Attr(strides)'s "
          "length must be euqal to 2 for Op(Conv). "
          "But received: input's dimension is %d, input's shape is [%s]; "
          "Attr(stride)'s length is %d, Attr(stride) is [%s]; "
          "difference of input's dimention and Attr(strides)'s length = %u.",
108 109 110 111
          in_dims.size(),
          in_dims,
          strides.size(),
          phi::make_ddim(strides),
112
          in_sub_stride_size));
L
liym27 已提交
113 114 115

  const auto input_channels =
      channel_last ? in_dims[in_dims.size() - 1] : in_dims[1];
F
fengjiayi 已提交
116

117
  PADDLE_ENFORCE_EQ(
118 119
      input_channels,
      filter_dims[1] * groups,
120
      platform::errors::InvalidArgument(
121 122 123 124 125
          "The number of input's channels should be equal to filter's channels "
          "* groups for Op(Conv). But received: the input's channels is %d, "
          "the input's shape is [%s]; the filter's channels is %d, the "
          "filter's shape is [%s]; the groups is %d, the data_format is %s. "
          "The error may come from wrong data_format setting.",
126 127 128 129 130
          input_channels,
          in_dims,
          filter_dims[1],
          filter_dims,
          groups,
131
          data_format));
C
chengduoZH 已提交
132
  PADDLE_ENFORCE_EQ(
133 134
      filter_dims[0] % groups,
      0,
135
      platform::errors::InvalidArgument(
136 137 138 139
          "The number of output's channels (filter's first dimension) of "
          "Op(Conv) should be divided by groups. But received: "
          "the output channels is %d, the filter's shape is [%s], "
          "the groups is %d.",
140 141 142
          filter_dims[0],
          filter_dims,
          groups));
W
wangxinxin08 已提交
143 144 145

  if (ctx->IsRuntime()) {
    PADDLE_ENFORCE_GT(
146 147
        filter_dims[0],
        0,
W
wangxinxin08 已提交
148 149 150
        platform::errors::InvalidArgument(
            "the size of filter at axis 0 should be greater than 0"));
  }
C
chengduoZH 已提交
151

L
liym27 已提交
152 153
  framework::DDim in_data_dims;
  if (channel_last) {
154
    in_data_dims = phi::slice_ddim(in_dims, 1, in_dims.size() - 1);
L
liym27 已提交
155
  } else {
156
    in_data_dims = phi::slice_ddim(in_dims, 2, in_dims.size());
L
liym27 已提交
157
  }
158

159
  framework::DDim filter_data_dims =
160
      phi::slice_ddim(filter_dims, 2, filter_dims.size());
161

162
  std::vector<int> ksize = phi::vectorize<int>(filter_data_dims);
163 164
  UpdatePaddingAndDilation(
      &paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize);
L
liym27 已提交
165 166 167 168 169

  std::vector<int64_t> output_shape({in_dims[0]});
  if (!channel_last) {
    output_shape.push_back(filter_dims[0]);
  }
170
  for (int i = 0; i < in_data_dims.size(); ++i) {
T
tink2123 已提交
171
    if ((!ctx->IsRuntime()) &&
L
liym27 已提交
172
        (in_data_dims[i] <= 0 || filter_dims[i + 2] <= 0)) {
T
tink2123 已提交
173 174
      output_shape.push_back(-1);
    } else {
175 176 177 178 179 180
      output_shape.push_back(ConvOutputSize(in_data_dims[i],
                                            filter_data_dims[i],
                                            dilations[i],
                                            paddings[2 * i],
                                            paddings[2 * i + 1],
                                            strides[i]));
T
tink2123 已提交
181
    }
C
chengduoZH 已提交
182
  }
L
liym27 已提交
183 184 185 186
  if (channel_last) {
    output_shape.push_back(filter_dims[0]);
  }

187
  return output_shape;
C
chengduoZH 已提交
188 189
}

190 191
framework::OpKernelType ConvOp::GetExpectedKernelType(
    const framework::ExecutionContext& ctx) const {
X
Xin Pan 已提交
192 193
  int customized_type_value =
      framework::OpKernelType::kDefaultCustomizedTypeValue;
194
  framework::LibraryType library{framework::LibraryType::kPlain};
M
mozga-intel 已提交
195
  // TODO(pzelazko-intel): enable MKLDNN layout when it's ready
196
  auto input_data_type = OperatorWithKernel::IndicateVarDataType(ctx, "Input");
L
liym27 已提交
197 198
  std::string data_format =
      "AnyLayout";  // todo enable data layout when it's ready
M
mozga-intel 已提交
199 200
  framework::DataLayout layout = framework::StringToDataLayout(data_format);

201
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
202
  if (platform::CanCUDNNBeUsed(ctx)) {
203
    library = framework::LibraryType::kCUDNN;
C
chengduoZH 已提交
204 205
  }
#endif
206
#ifdef PADDLE_WITH_MKLDNN
207 208
  if (library == framework::LibraryType::kPlain &&
      this->CanMKLDNNBeUsed(ctx, input_data_type)) {
209
    library = framework::LibraryType::kMKLDNN;
M
mozga-intel 已提交
210
    layout = framework::DataLayout::kMKLDNN;
211
    customized_type_value =
212 213
        (input_data_type == framework::DataTypeTrait<int8_t>::DataType() ||
         input_data_type == framework::DataTypeTrait<uint8_t>::DataType())
214 215 216 217
            ? OperatorWithKernel::IndicateVarDataType(ctx, "Filter") ==
                      framework::DataTypeTrait<int8_t>::DataType()
                  ? kConvMKLDNNINT8WS8
                  : kConvMKLDNNINT8
218
            : kConvMKLDNNFP32;
219
  }
220
#endif
221

222
  if (input_data_type != framework::proto::VarType::INT8 &&
223 224
      input_data_type != framework::proto::VarType::UINT8 &&
      input_data_type != framework::proto::VarType::BF16) {
225 226
    auto filter_data_type =
        framework::TransToProtoVarType(ctx.Input<Tensor>("Filter")->dtype());
227
    PADDLE_ENFORCE_EQ(
228 229
        input_data_type,
        filter_data_type,
230 231 232 233 234 235
        platform::errors::InvalidArgument(
            "input and filter data type should be consistent, "
            "but received input data type is %s and filter type "
            "is %s",
            paddle::framework::DataTypeToString(input_data_type),
            paddle::framework::DataTypeToString(filter_data_type)));
236
  }
H
hong 已提交
237 238 239 240 241 242 243 244
// #ifndef PADDLE_WITH_ASCEND_CL
//   if (input_data_type == framework::proto::VarType::FP16) {
//     PADDLE_ENFORCE_EQ(
//         library, framework::LibraryType::kCUDNN,
//         platform::errors::InvalidArgument(
//             "float16 can only be used when CUDNN or NPU is used"));
//   }
// #endif
W
wuhuanzhou 已提交
245 246 247 248
#if PADDLE_WITH_CUDA
  if (input_data_type == framework::proto::VarType::BF16 &&
      library == framework::LibraryType::kCUDNN) {
    PADDLE_ENFORCE_GE(
249 250
        platform::DnnVersion(),
        8100,
W
wuhuanzhou 已提交
251 252 253 254
        platform::errors::InvalidArgument(
            "bfloat16 can only be used when CUDNN_VERSION >= 8100"));
  }
#endif  // PADDLE_WITH_CUDA
K
Kexin Zhao 已提交
255

256 257
  auto type = framework::OpKernelType(
      input_data_type, ctx.GetPlace(), layout, library, customized_type_value);
258
  return type;
259 260
}

261
framework::OpKernelType ConvOp::GetKernelTypeForVar(
262 263
    const std::string& var_name,
    const Tensor& tensor,
264 265 266 267 268 269 270 271 272 273 274
    const framework::OpKernelType& expected_kernel_type) const {
#ifdef PADDLE_WITH_MKLDNN
  // Only input require reshaping, weights and
  // bias are having shape in NCHW order
  if ((var_name == "Input") &&
      (expected_kernel_type.data_layout_ == framework::DataLayout::kMKLDNN) &&
      (tensor.layout() != framework::DataLayout::kMKLDNN)) {
    auto attrs = Attrs();
    auto ar = paddle::framework::AttrReader(attrs);
    const std::string data_format = ar.Get<std::string>("data_format");
    auto dl = framework::StringToDataLayout(data_format);
275
    // Some models may have intentionally set "AnyLayout" for conv
276 277
    // op. Treat this as NCHW (default data_format value)
    if (dl != framework::DataLayout::kAnyLayout) {
278 279
      return framework::OpKernelType(
          expected_kernel_type.data_type_, tensor.place(), dl);
280 281 282
    }
  }
#endif
283 284
  return framework::OpKernelType(
      expected_kernel_type.data_type_, tensor.place(), tensor.layout());
285 286
}

Y
Yu Yang 已提交
287
void Conv2DOpMaker::Make() {
288 289 290
  AddAttr<bool>("is_test",
                "(bool, default false) Set to true for inference only, false "
                "for training. Some layers may run faster when this is true.")
291 292
      .SetDefault(false)
      .AsExtra();
L
liym27 已提交
293 294 295 296 297 298
  AddInput("Input",
           "(Tensor) The input tensor of convolution operator. "
           "The format of input tensor is NCHW or NHWC, where N is batch size, "
           "C is the "
           "number of channels, H is the height of the feature, "
           "and W is the width of the feature.");
C
chengduoZH 已提交
299
  AddInput("Filter",
C
fix doc  
chengduoZH 已提交
300
           "(Tensor) The filter tensor of convolution operator. "
C
chengduoZH 已提交
301 302
           "The format of the filter tensor is MCHW, where M is the number of "
           "output image channels, C is the number of input image channels, "
C
fix doc  
chengduoZH 已提交
303 304
           "H is the height of the filter, and W is the width of the filter. "
           "If the groups attribute is greater than 1, C equals the number of "
C
chengduoZH 已提交
305
           "input image channels divided by the groups.");
306 307 308 309
  AddInput("Bias",
           "(Tensor) Bias to be added to each output of filter application."
           "The format of output tensor is X (one-dimensional) of size equal"
           "to the number of output channels. Only used with MKL-DNN.")
310 311
      .AsDispensable()
      .AsExtra();
312 313 314
  AddInput("ResidualData",
           "(Tensor) Tensor with residual data "
           "to which convolution output will be added."
315
           "Used with fuse_residual_connection fusion.")
316 317
      .AsDispensable()
      .AsExtra();
Y
Yihua Xu 已提交
318 319
  AddOutput("Output",
            "(Tensor) The output tensor of convolution operator. "
L
liym27 已提交
320
            "It has same data fromat and data type as the Input.");
C
chengduoZH 已提交
321 322 323 324
  AddAttr<std::vector<int>>("strides",
                            "(vector<int> default:{1, 1}), the "
                            "strides(h_stride, w_stride) of "
                            "convolution operator.")
C
chengduoZH 已提交
325
      .SetDefault({1, 1});
C
chengduoZH 已提交
326 327
  AddAttr<std::vector<int>>("paddings",
                            "(vector<int> default:{0, 0}), the "
L
liym27 已提交
328 329
                            "paddings(pad_height_top, pad_height_bottom, "
                            "pad_width_left, pad_wifth_right)  of "
C
chengduoZH 已提交
330
                            "convolution operator.")
C
chengduoZH 已提交
331
      .SetDefault({0, 0});
L
liym27 已提交
332 333 334 335 336 337
  AddAttr<std::string>(
      "padding_algorithm",
      "(string, default \"EXPLICIT\") An optional string from: \"EXPLICIT\","
      "\"SAME\",\"VALID\". Set to \"EXPLICIT\" for explicit padding. "
      "Set to \"SAME\" or \"VALID\" for algorithm of padding. ")
      .SetDefault("EXPLICIT");
C
chengduoZH 已提交
338 339
  AddAttr<int>(
      "groups",
C
chengduoZH 已提交
340
      "(int default:1), the groups number of the convolution operator. "
C
fix doc  
chengduoZH 已提交
341 342 343 344
      "According to grouped convolution in Alex Krizhevsky's Deep CNN paper: "
      "when group=2, the first half of the filters is only connected to the "
      "first half of the input channels, while the second half of the filters "
      "is only connected to the second half of the input channels.")
C
chengduoZH 已提交
345
      .SetDefault(1);
C
chengduoZH 已提交
346
  AddAttr<std::vector<int>>("dilations",
C
chengduoZH 已提交
347 348
                            "(vector<int> default:{1, 1}), the "
                            "dilations(h_dilation, w_dilation) of "
C
chengduoZH 已提交
349
                            "convolution operator.")
C
chengduoZH 已提交
350
      .SetDefault({1, 1});
351 352 353 354 355 356
  AddAttr<std::string>(
      "data_format",
      "(string, default NCHW) Only used in "
      "An optional string from: \"NHWC\", \"NCHW\". "
      "Defaults to \"NHWC\". Specify the data format of the output data, "
      "the input will be transformed automatically. ")
L
liym27 已提交
357
      .SetDefault("NCHW");
358 359 360 361 362 363 364 365
  // TODO(dzhwinter): need to registered layout transform function
  AddAttr<int>("workspace_size_MB",
               "Only used in cudnn kernel. Need set use_cudnn to true."
               "workspace size for cudnn, in MB, "
               "workspace is a section of GPU memory which will be "
               "allocated/freed each time the operator runs, larger "
               "workspace size can increase performance but also requires "
               "better hardware. This size should be chosen carefully.")
366 367
      .SetDefault(platform::GetDefaultConvWorkspaceSizeLimitMB())
      .AsExtra();
368 369
  AddAttr<bool>("exhaustive_search",
                "(bool, default false) cuDNN has many algorithm to calculation "
C
chengduo 已提交
370
                "convolution, whether enable exhaustive search "
翟飞跃 已提交
371
                "for cuDNN convolution or not, default is False.")
372 373
      .SetDefault(false)
      .AsExtra();
L
liym27 已提交
374

C
chengduoZH 已提交
375
  AddComment(R"DOC(
C
fix doc  
chengduoZH 已提交
376 377
Convolution Operator.

C
chengduoZH 已提交
378
The convolution operation calculates the output based on the input, filter
C
chengduoZH 已提交
379
and strides, paddings, dilations, groups parameters. The size of each dimension of the
C
chengduoZH 已提交
380
parameters is checked in the infer-shape.
L
liym27 已提交
381
Input(Input) and Output(Output) are in NCHW or NHWC format. Where N is batch
C
fix doc  
chengduoZH 已提交
382
size, C is the number of channels, H is the height of the feature, and W is
C
chengduoZH 已提交
383
the width of the feature.
384
Filters(Input) is MCHW format format. Where M is the number of output image channels, C is
C
chengduoZH 已提交
385 386 387 388
the number of input image channels, H is the height of the filter, and W
is the width of the filter.
Parameters(strides, paddings, dilations) are two elements. These two elements represent
height and width, respectively.
C
chengduoZH 已提交
389 390 391 392
The input(X) size and output(Out) size may be different.

Example:
  Input:
C
chengduoZH 已提交
393 394
       Input shape: $(N, C_{in}, H_{in}, W_{in})$
       Filter shape: $(C_{out}, C_{in}, H_f, W_f)$
C
chengduoZH 已提交
395
  Output:
C
chengduoZH 已提交
396 397 398
       Output shape: $(N, C_{out}, H_{out}, W_{out})$
  Where
$$
L
liym27 已提交
399 400
       H_{out}= \frac{(H_{in} + pad_height_top + pad_height_bottom - (dilations[0] * (H_f - 1) + 1))}{strides[0]}+ 1 \\
       W_{out}= \frac{(W_{in} + pad_width_left + pad_width_right - (dilations[1] * (W_f - 1) + 1))}{strides[1]}+ 1
C
chengduoZH 已提交
401
$$
C
chengduoZH 已提交
402
)DOC");
Q
qingqing01 已提交
403
  Apply();
C
chengduoZH 已提交
404 405
}

Y
Yu Yang 已提交
406
void Conv3DOpMaker::Make() {
407 408 409
  AddAttr<bool>("is_test",
                "(bool, default false) Set to true for inference only, false "
                "for training. Some layers may run faster when this is true.")
410 411
      .SetDefault(false)
      .AsExtra();
C
chengduoZH 已提交
412 413
  AddInput(
      "Input",
C
fix doc  
chengduoZH 已提交
414
      "(Tensor) The input tensor of convolution operator. "
L
liym27 已提交
415 416
      "The format of input tensor is NCDHW or NDHWC. Where N is batch size, C "
      "is the "
C
fix doc  
chengduoZH 已提交
417 418 419
      "number of channels, D is the depth of the feature, H is the height of "
      "the feature, "
      "and W is the width of the feature.");
C
chengduoZH 已提交
420
  AddInput("Filter",
C
fix doc  
chengduoZH 已提交
421
           "(Tensor) The filter tensor of convolution operator. "
C
chengduoZH 已提交
422 423
           "The format of the filter tensor is MCDHW, where M is the number of "
           "output image channels, C is the number of input image channels, "
C
fix doc  
chengduoZH 已提交
424 425 426
           "D is the depth of the filter, H is the height of the filter, and W "
           "is the width of the filter."
           "If the groups attribute is greater than 1, C equals the number of "
C
chengduoZH 已提交
427
           "input image channels divided by the groups.");
428 429 430 431
  AddInput("ResidualData",
           "(Tensor) Tensor with residual data "
           "to which convolution output will be added."
           "Used with fuse_residual_connection fusion.")
432 433
      .AsDispensable()
      .AsExtra();
Y
Yihua Xu 已提交
434 435
  AddOutput("Output",
            "(Tensor) The output tensor of convolution operator."
L
liym27 已提交
436
            "It has same data fromat and data type as the Input.");
C
chengduoZH 已提交
437 438 439 440
  AddAttr<std::vector<int>>("strides",
                            "(vector<int>, default:{1, 1, 1}), the "
                            "strides(d_stride, h_stride, w_stride) of "
                            "convolution operator.")
C
chengduoZH 已提交
441
      .SetDefault({1, 1, 1});
L
liym27 已提交
442 443 444 445 446 447
  AddAttr<std::vector<int>>(
      "paddings",
      "(vector<int>, default:{0, 0, 0}), the "
      "paddings(pad_depth_front, pad_depth_back, pad_height_top, "
      "pad_height_bottom, pad_width_left, pad_width_right) of convolution "
      "operator.")
C
chengduoZH 已提交
448
      .SetDefault({0, 0, 0});
L
liym27 已提交
449 450 451 452 453 454
  AddAttr<std::string>(
      "padding_algorithm",
      "(string, default \"EXPLICIT\") An optional string from: \"EXPLICIT\","
      "\"SAME\",\"VALID\". Set to \"EXPLICIT\" for explicit padding. "
      "Set to \"SAME\" or \"VALID\" for algorithm of padding. ")
      .SetDefault("EXPLICIT");
C
chengduoZH 已提交
455 456
  AddAttr<int>(
      "groups",
C
chengduoZH 已提交
457
      "(int default:1), the groups number of the convolution operator. "
C
fix doc  
chengduoZH 已提交
458 459 460 461
      "According to grouped convolution in Alex Krizhevsky's Deep CNN paper: "
      "when group=2, the first half of the filters is only connected to the "
      "first half of the input channels, while the second half of the filters "
      "is only connected to the second half of the input channels.")
C
chengduoZH 已提交
462
      .SetDefault(1);
C
chengduoZH 已提交
463
  AddAttr<std::vector<int>>("dilations",
C
chengduoZH 已提交
464 465
                            "(vector<int> default:{1, 1, 1}), the "
                            "dilations(d_dilation, h_dilation, w_dilation) of "
C
chengduoZH 已提交
466
                            "convolution operator.")
C
chengduoZH 已提交
467
      .SetDefault({1, 1, 1});
468 469 470
  AddAttr<bool>(
      "use_cudnn",
      "(bool, default false) Only used in cudnn kernel, need install cudnn")
471 472
      .SetDefault(false)
      .AsExtra();
473 474
  AddAttr<bool>("use_mkldnn",
                "(bool, default false) Only used in mkldnn kernel")
475 476
      .SetDefault(false)
      .AsExtra();
477 478 479 480
  AddAttr<std::string>(
      "mkldnn_data_type",
      "(string, default \"float32\"). Data type of mkldnn kernel")
      .SetDefault("float32")
481 482
      .InEnum({"float32", "int8", "bfloat16"})
      .AsExtra();
483
  AddAttr<bool>("fuse_relu", "(bool, default false) Only used in mkldnn kernel")
484 485
      .SetDefault(false)
      .AsExtra();
486 487
  AddAttr<std::string>("fuse_activation",
                       "(string, default \"\") Only used in mkldnn kernel")
488 489
      .SetDefault("")
      .AsExtra();
490 491
  AddAttr<float>("fuse_alpha",
                 "(float, default 0.0) Only used in mkldnn kernel")
492 493
      .SetDefault(0.0f)
      .AsExtra();
494
  AddAttr<float>("fuse_beta", "(float, default 0.0) Only used in mkldnn kernel")
495 496
      .SetDefault(0.0f)
      .AsExtra();
497 498 499 500
  AddAttr<bool>(
      "use_addto",
      "(bool, default false) If use addto strategy or not, only used in "
      "cudnn kernel")
501 502
      .SetDefault(false)
      .AsExtra();
503 504 505 506
  AddAttr<bool>("fuse_residual_connection",
                "(bool, default false) Only used in mkldnn kernel. Used "
                "whenever convolution output is as an input to residual "
                "connection.")
507 508
      .SetDefault(false)
      .AsExtra();
509 510
  AddAttr<std::string>(
      "data_format",
L
liym27 已提交
511 512 513
      "(string, default NCDHW) Only used in "
      "An optional string from: \"NDHWC\", \"NCDHW\". "
      "Defaults to \"NDHWC\". Specify the data format of the output data, "
514
      "the input will be transformed automatically. ")
L
liym27 已提交
515
      .SetDefault("NCDHW");
516 517
  AddAttr<bool>("force_fp32_output",
                "(bool, default false) Only used in mkldnn INT8 kernel")
518 519
      .SetDefault(false)
      .AsExtra();
520 521 522 523 524 525 526
  // TODO(dzhwinter): need to registered layout transform function
  AddAttr<int>("workspace_size_MB",
               "Only used in cudnn kernel. workspace size for cudnn, in MB, "
               "workspace is a section of GPU memory which will be "
               "allocated/freed each time the operator runs, larger "
               "workspace size can increase performance but also requires "
               "better hardware. This size should be chosen carefully.")
527 528
      .SetDefault(platform::GetDefaultConvWorkspaceSizeLimitMB())
      .AsExtra();
529 530
  AddAttr<bool>("exhaustive_search",
                "(bool, default false) cuDNN has many algorithm to calculation "
C
chengduo 已提交
531
                "convolution, whether enable exhaustive search "
翟飞跃 已提交
532
                "for cuDNN convolution or not, default is False.")
533 534
      .SetDefault(false)
      .AsExtra();
C
chengduoZH 已提交
535
  AddComment(R"DOC(
C
fix doc  
chengduoZH 已提交
536 537
Convolution3D Operator.

C
chengduoZH 已提交
538
The convolution operation calculates the output based on the input, filter
C
chengduoZH 已提交
539
and strides, paddings, dilations, groups parameters. The size of each dimension of the
C
chengduoZH 已提交
540
parameters is checked in the infer-shape.
L
liym27 已提交
541
Input(Input) and output(Output) are in NCDHW or NDHWC format, where N is batch
C
fix doc  
chengduoZH 已提交
542
size, C is the number of channels,D is the depth of the feature, H is the height of
C
chengduoZH 已提交
543 544 545 546 547 548
the feature, and W is the width of the feature.
Filters(Input) is MCDHW format, where M is the number of output image channels,
C is the number of input image channels, D is the depth of the filter,
H is the height of the filter, and W is the width of the filter.
Parameters(strides, paddings, dilations) are three elements. These three elements
represent depth, height and width, respectively.
C
fix doc  
chengduoZH 已提交
549 550 551 552
The input(X) size and output(Out) size may be different.

Example:
  Input:
C
chengduoZH 已提交
553 554
       Input shape: $(N, C_{in}, D_{in}, H_{in}, W_{in})$
       Filter shape: $(C_{out}, C_{in}, D_f, H_f, W_f)$
C
fix doc  
chengduoZH 已提交
555
  Output:
C
chengduoZH 已提交
556 557 558
       Output shape: $(N, C_{out}, D_{out}, H_{out}, W_{out})$
  Where
  $$
L
liym27 已提交
559 560 561
       D_{out}= \frac{(D_{in} + pad_depth_front + pad_depth_back - (dilations[0] * (D_f - 1) + 1))}{ strides[0]}+ 1 \\
       H_{out}= \frac{(H_{in} + pad_height_top + pad_height_bottom - (dilations[1] * (H_f - 1) + 1))}{ strides[1]}+ 1 \\
       W_{out}= \frac{(W_{in} + pad_width_left + pad_width_right - (dilations[2] * (W_f - 1) + 1))}{ strides[2]}+ 1
C
chengduoZH 已提交
562
  $$
C
chengduoZH 已提交
563
)DOC");
Q
qingqing01 已提交
564
  Apply();
C
chengduoZH 已提交
565 566
}

C
chengduoZH 已提交
567 568 569 570 571 572 573 574 575 576 577
void ConvOpGrad::InferShape(framework::InferShapeContext* ctx) const {
  auto in_dims = ctx->GetInputDim("Input");
  auto filter_dims = ctx->GetInputDim("Filter");
  if (ctx->HasOutput(framework::GradVarName("Input"))) {
    ctx->SetOutputDim(framework::GradVarName("Input"), in_dims);
  }
  if (ctx->HasOutput(framework::GradVarName("Filter"))) {
    ctx->SetOutputDim(framework::GradVarName("Filter"), filter_dims);
  }
}

578 579
framework::OpKernelType ConvOpGrad::GetExpectedKernelType(
    const framework::ExecutionContext& ctx) const {
X
Xin Pan 已提交
580 581
  int customized_type_value =
      framework::OpKernelType::kDefaultCustomizedTypeValue;
582
  framework::LibraryType library_{framework::LibraryType::kPlain};
M
mozga-intel 已提交
583
  // TODO(pzelazko-intel): enable MKLDNN layout when it's ready
L
liym27 已提交
584
  std::string data_format = "AnyLayout";
M
mozga-intel 已提交
585
  framework::DataLayout layout_ = framework::StringToDataLayout(data_format);
586
  auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "Input");
M
mozga-intel 已提交
587

588
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
589 590
  if (platform::CanCUDNNBeUsed(ctx)) {
    library_ = framework::LibraryType::kCUDNN;
C
chengduoZH 已提交
591 592
  }
#endif
593 594
#ifdef PADDLE_WITH_MKLDNN
  if (library_ == framework::LibraryType::kPlain &&
595
      this->CanMKLDNNBeUsed(ctx, data_type)) {
596
    const std::string data_format = ctx.Attr<std::string>("data_format");
597
    library_ = framework::LibraryType::kMKLDNN;
M
mozga-intel 已提交
598
    layout_ = framework::DataLayout::kMKLDNN;
X
Xin Pan 已提交
599
    customized_type_value = kConvMKLDNNFP32;
600
  }
601
#endif
602

603 604
  auto type = framework::OpKernelType(
      data_type, ctx.GetPlace(), layout_, library_, customized_type_value);
605
  return type;
606 607
}

608
framework::OpKernelType ConvOpGrad::GetKernelTypeForVar(
609 610
    const std::string& var_name,
    const Tensor& tensor,
611 612 613 614 615 616 617 618 619 620 621 622 623 624 625
    const framework::OpKernelType& expected_kernel_type) const {
#ifdef PADDLE_WITH_MKLDNN
  // Only input require reshaping, weights and
  // bias are having shape in NCHW order
  if (((var_name == "Input") ||
       (var_name == framework::GradVarName("Output"))) &&
      (expected_kernel_type.data_layout_ == framework::DataLayout::kMKLDNN) &&
      (tensor.layout() != framework::DataLayout::kMKLDNN)) {
    auto attrs = Attrs();
    auto ar = paddle::framework::AttrReader(attrs);
    const std::string data_format = ar.Get<std::string>("data_format");
    auto dl = framework::StringToDataLayout(data_format);
    // Some models may have intentionally set "AnyLayout" for pool
    // op. Treat this as NCHW (default data_format value)
    if (dl != framework::DataLayout::kAnyLayout) {
626 627
      return framework::OpKernelType(
          expected_kernel_type.data_type_, tensor.place(), dl);
628 629 630
    }
  }
#endif
631 632
  return framework::OpKernelType(
      expected_kernel_type.data_type_, tensor.place(), tensor.layout());
633 634
}

H
hong 已提交
635 636
template <typename T>
class Conv2DGradMaker : public framework::SingleGradOpMaker<T> {
637
 public:
H
hong 已提交
638
  using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
639

640
  void Apply(GradOpPtr<T> op) const override {
S
sneaxiy 已提交
641
    op->SetType(this->ForwardOpType() + "_grad");
H
hong 已提交
642 643 644
    op->SetInput("Input", this->Input("Input"));
    op->SetInput("Filter", this->Input("Filter"));
    op->SetInput(framework::GradVarName("Output"), this->OutputGrad("Output"));
645

H
hong 已提交
646 647
    op->SetOutput(framework::GradVarName("Input"), this->InputGrad("Input"));
    op->SetOutput(framework::GradVarName("Filter"), this->InputGrad("Filter"));
648 649 650 651 652

    if (this->HasInput("Bias")) {
      op->SetInput("Bias", this->Input("Bias"));
      op->SetOutput(framework::GradVarName("Bias"), this->InputGrad("Bias"));
    }
H
hong 已提交
653
    op->SetAttrMap(this->Attrs());
654
  }
S
sneaxiy 已提交
655 656
};

H
hong 已提交
657 658
template <typename T>
class Conv3DGradMaker : public framework::SingleGradOpMaker<T> {
S
sneaxiy 已提交
659
 public:
H
hong 已提交
660
  using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
661

662
  void Apply(GradOpPtr<T> op) const override {
S
sneaxiy 已提交
663
    op->SetType(this->ForwardOpType() + "_grad");
H
hong 已提交
664 665 666
    op->SetInput("Input", this->Input("Input"));
    op->SetInput("Filter", this->Input("Filter"));
    op->SetInput(framework::GradVarName("Output"), this->OutputGrad("Output"));
S
sneaxiy 已提交
667

H
hong 已提交
668 669
    op->SetOutput(framework::GradVarName("Input"), this->InputGrad("Input"));
    op->SetOutput(framework::GradVarName("Filter"), this->InputGrad("Filter"));
S
sneaxiy 已提交
670

H
hong 已提交
671 672
    if (this->HasInput("ResidualData")) {
      op->SetInput("ResidualData", this->Input("ResidualData"));
S
sneaxiy 已提交
673 674
    }

H
hong 已提交
675
    op->SetAttrMap(this->Attrs());
676 677 678
  }
};

Q
qingqing01 已提交
679 680 681 682
/*
 * Inputs:  I, W, dO, ddI, ddW
 * Outputs: ddO, dW, dI
 */
H
hong 已提交
683 684
template <typename T>
class Conv2DDoubleGradMaker : public framework::SingleGradOpMaker<T> {
Q
qingqing01 已提交
685
 public:
H
hong 已提交
686
  using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
Q
qingqing01 已提交
687

688
  void Apply(GradOpPtr<T> op) const override {
Q
qingqing01 已提交
689 690
    op->SetType(this->ForwardOpType() + "_grad");
    // I, W, dO, ddI, ddW
H
hong 已提交
691 692 693 694 695 696
    op->SetInput("Input", this->Input("Input"));
    op->SetInput("Filter", this->Input("Filter"));
    op->SetInput("DOutput", this->Input(framework::GradVarName("Output")));
    op->SetInput("DDInput", this->OutputGrad(framework::GradVarName("Input")));
    op->SetInput("DDFilter",
                 this->OutputGrad(framework::GradVarName("Filter")));
Q
qingqing01 已提交
697 698 699 700

    // ddO, dI, dW
    // Unlike grad op, double grad op does not use name@GRAD@GRAD
    // as key of ops' inputs and outputs.
H
hong 已提交
701 702
    auto ddx = this->OutputGrad(framework::GradVarName("Input"));
    auto ddw = this->OutputGrad(framework::GradVarName("Filter"));
703

L
lvmengsi 已提交
704
    op->SetOutput("DDOutput",
H
hong 已提交
705
                  ddx.empty()
706
                      ? this->EmptyInputGrad()
H
hong 已提交
707
                      : this->InputGrad(framework::GradVarName("Output")));
708 709 710 711 712 713
    op->SetOutput(
        "DFilter",
        ddx.empty() ? this->EmptyInputGrad() : this->InputGrad("Filter"));
    op->SetOutput(
        "DInput",
        ddw.empty() ? this->EmptyInputGrad() : this->InputGrad("Input"));
714

H
hong 已提交
715
    op->SetAttrMap(this->Attrs());
Q
qingqing01 已提交
716 717 718
  }
};

L
lvmengsi 已提交
719 720 721 722
/*
 * Inputs:  I, W, dO, ddI, ddW
 * Outputs: ddO, dW, dI
 */
H
hong 已提交
723 724
template <typename T>
class Conv3DDoubleGradMaker : public framework::SingleGradOpMaker<T> {
L
lvmengsi 已提交
725
 public:
H
hong 已提交
726
  using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
L
lvmengsi 已提交
727

728
  void Apply(GradOpPtr<T> op) const override {
L
lvmengsi 已提交
729 730
    op->SetType(this->ForwardOpType() + "_grad");
    // I, W, dO, ddI, ddW
H
hong 已提交
731 732 733 734 735 736
    op->SetInput("Input", this->Input("Input"));
    op->SetInput("Filter", this->Input("Filter"));
    op->SetInput("DOutput", this->Input(framework::GradVarName("Output")));
    op->SetInput("DDInput", this->OutputGrad(framework::GradVarName("Input")));
    op->SetInput("DDFilter",
                 this->OutputGrad(framework::GradVarName("Filter")));
L
lvmengsi 已提交
737

H
hong 已提交
738 739
    auto ddx = this->OutputGrad(framework::GradVarName("Input"));
    auto ddw = this->OutputGrad(framework::GradVarName("Filter"));
L
lvmengsi 已提交
740

L
lvmengsi 已提交
741
    op->SetOutput("DDOutput",
H
hong 已提交
742
                  ddx.empty()
743
                      ? this->EmptyInputGrad()
H
hong 已提交
744
                      : this->InputGrad(framework::GradVarName("Output")));
745 746 747 748 749 750
    op->SetOutput(
        "DFilter",
        ddx.empty() ? this->EmptyInputGrad() : this->InputGrad("Filter"));
    op->SetOutput(
        "DInput",
        ddw.empty() ? this->EmptyInputGrad() : this->InputGrad("Input"));
L
lvmengsi 已提交
751

H
hong 已提交
752
    op->SetAttrMap(this->Attrs());
L
lvmengsi 已提交
753 754 755
  }
};

Q
qingqing01 已提交
756 757 758 759 760
void ConvOpDoubleGrad::InferShape(framework::InferShapeContext* ctx) const {
  auto x_dims = ctx->GetInputDim("Input");
  auto w_dims = ctx->GetInputDim("Filter");
  auto do_dims = ctx->GetInputDim("DOutput");

L
lvmengsi 已提交
761 762
  if (ctx->HasOutput("DDOutput") &&
      (ctx->HasInput("DDInput") || (ctx->HasInput("DDFilter")))) {
Q
qingqing01 已提交
763 764
    ctx->SetOutputDim("DDOutput", do_dims);
  }
765
  if (ctx->HasOutput("DFilter") && ctx->HasInput("DDInput")) {
Q
qingqing01 已提交
766 767
    ctx->SetOutputDim("DFilter", w_dims);
  }
768
  if (ctx->HasOutput("DInput") && ctx->HasInput("DDFilter")) {
Q
qingqing01 已提交
769 770 771 772 773 774 775 776 777
    ctx->SetOutputDim("DInput", x_dims);
  }
}

framework::OpKernelType ConvOpDoubleGrad::GetExpectedKernelType(
    const framework::ExecutionContext& ctx) const {
  int customized_type_value =
      framework::OpKernelType::kDefaultCustomizedTypeValue;
  framework::LibraryType library_{framework::LibraryType::kPlain};
L
liym27 已提交
778
  std::string data_format = "AnyLayout";
Q
qingqing01 已提交
779 780
  framework::DataLayout layout_ = framework::StringToDataLayout(data_format);

781
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
Q
qingqing01 已提交
782 783
  if (platform::CanCUDNNBeUsed(ctx)) {
    library_ = framework::LibraryType::kCUDNN;
L
lvmengsi 已提交
784
  }
Q
qingqing01 已提交
785
#endif
786
  auto type = framework::OpKernelType(
787 788 789 790 791
      OperatorWithKernel::IndicateVarDataType(ctx, "Input"),
      ctx.GetPlace(),
      layout_,
      library_,
      customized_type_value);
Q
qingqing01 已提交
792 793 794
  return type;
}

C
chengduoZH 已提交
795 796 797 798
}  // namespace operators
}  // namespace paddle

namespace ops = paddle::operators;
799 800 801
REGISTER_OPERATOR(conv2d,
                  ops::ConvOp,
                  ops::Conv2DOpMaker,
H
hong 已提交
802 803 804
                  ops::ConvOpInferVarType,
                  ops::Conv2DGradMaker<paddle::framework::OpDesc>,
                  ops::Conv2DGradMaker<paddle::imperative::OpBase>);
805 806
REGISTER_OPERATOR(conv2d_grad,
                  ops::ConvOpGrad,
H
hong 已提交
807 808
                  ops::Conv2DDoubleGradMaker<paddle::framework::OpDesc>,
                  ops::Conv2DDoubleGradMaker<paddle::imperative::OpBase>);
Q
qingqing01 已提交
809
REGISTER_OPERATOR(conv2d_grad_grad, ops::ConvOpDoubleGrad);
810 811

// depthwise convolution op
812 813 814
REGISTER_OPERATOR(depthwise_conv2d,
                  ops::ConvOp,
                  ops::Conv2DOpMaker,
H
hong 已提交
815 816 817
                  ops::ConvOpInferVarType,
                  ops::Conv2DGradMaker<paddle::framework::OpDesc>,
                  ops::Conv2DGradMaker<paddle::imperative::OpBase>);
818 819
REGISTER_OPERATOR(depthwise_conv2d_grad,
                  ops::ConvOpGrad,
820 821 822
                  ops::Conv2DDoubleGradMaker<paddle::framework::OpDesc>,
                  ops::Conv2DDoubleGradMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(depthwise_conv2d_grad_grad, ops::ConvOpDoubleGrad);
C
chengduo 已提交
823

824 825 826
REGISTER_OPERATOR(conv3d,
                  ops::ConvOp,
                  ops::Conv3DOpMaker,
H
hong 已提交
827 828 829
                  ops::ConvOpInferVarType,
                  ops::Conv3DGradMaker<paddle::framework::OpDesc>,
                  ops::Conv3DGradMaker<paddle::imperative::OpBase>);
830 831
REGISTER_OPERATOR(conv3d_grad,
                  ops::ConvOpGrad,
H
hong 已提交
832 833
                  ops::Conv3DDoubleGradMaker<paddle::framework::OpDesc>,
                  ops::Conv3DDoubleGradMaker<paddle::imperative::OpBase>);
L
lvmengsi 已提交
834
REGISTER_OPERATOR(conv3d_grad_grad, ops::ConvOpDoubleGrad);
C
chengduoZH 已提交
835

836 837
REGISTER_OP_VERSION(conv2d).AddCheckpoint(
    R"ROC(
838 839
      Upgrade conv2d, add a new attribute [use_addto].
    )ROC",
840 841 842 843 844
    paddle::framework::compatible::OpVersionDesc().NewAttr(
        "use_addto",
        "In order to support new feature (inplace addto strategy) for "
        "gradient accumulation.",
        false));
845 846 847 848 849 850 851 852 853 854 855 856

REGISTER_OP_VERSION(depthwise_conv2d)
    .AddCheckpoint(
        R"ROC(
      Upgrade depthwise_conv2d, add a new attribute [use_addto].
    )ROC",
        paddle::framework::compatible::OpVersionDesc().NewAttr(
            "use_addto",
            "In order to support new feature (inplace addto strategy) for "
            "gradient accumulation.",
            false));

857 858
REGISTER_OP_VERSION(conv3d).AddCheckpoint(
    R"ROC(
859 860
      Upgrade conv3d, add a new attribute [use_addto].
    )ROC",
861 862 863 864 865
    paddle::framework::compatible::OpVersionDesc().NewAttr(
        "use_addto",
        "In order to support new feature (inplace addto strategy) for "
        "gradient accumulation.",
        false));