conv_op.cc 34.1 KB
Newer Older
1
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
C
chengduoZH 已提交
2

L
Luo Tao 已提交
3 4 5
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
C
chengduoZH 已提交
6

L
Luo Tao 已提交
7
    http://www.apache.org/licenses/LICENSE-2.0
C
chengduoZH 已提交
8

L
Luo Tao 已提交
9 10 11 12 13
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
C
chengduoZH 已提交
14

Y
Yi Wang 已提交
15
#include "paddle/fluid/operators/conv_op.h"
Y
Update  
Yi Wang 已提交
16

17
#include <memory>
Y
Update  
Yi Wang 已提交
18 19 20
#include <string>
#include <vector>

21
#ifdef PADDLE_WITH_CUDA
22
#include "paddle/fluid/operators/conv_cudnn_op_cache.h"
23 24 25 26 27
#include "paddle/fluid/platform/cudnn_helper.h"
#endif
#ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/platform/mkldnn_helper.h"
#endif
28
#include "paddle/fluid/platform/cudnn_workspace_helper.h"
C
chengduoZH 已提交
29 30 31 32

namespace paddle {
namespace operators {

C
chengduoZH 已提交
33
void ConvOp::InferShape(framework::InferShapeContext* ctx) const {
L
liym27 已提交
34 35 36 37 38 39
  PADDLE_ENFORCE_EQ(ctx->HasInput("Input"), true,
                    "Input(Input) of ConvOp should not be null.");
  PADDLE_ENFORCE_EQ(ctx->HasInput("Filter"), true,
                    "Input(Filter) of ConvOp should not be null.");
  PADDLE_ENFORCE_EQ(ctx->HasOutput("Output"), true,
                    "Output(Output) of ConvOp should not be null.");
C
chengduoZH 已提交
40 41 42

  auto in_dims = ctx->GetInputDim("Input");
  auto filter_dims = ctx->GetInputDim("Filter");
43

C
chengduoZH 已提交
44 45
  std::vector<int> strides = ctx->Attrs().Get<std::vector<int>>("strides");
  std::vector<int> paddings = ctx->Attrs().Get<std::vector<int>>("paddings");
L
liym27 已提交
46 47
  std::string padding_algorithm =
      ctx->Attrs().Get<std::string>("padding_algorithm");
C
chengduoZH 已提交
48
  int groups = ctx->Attrs().Get<int>("groups");
C
chengduoZH 已提交
49
  std::vector<int> dilations = ctx->Attrs().Get<std::vector<int>>("dilations");
L
liym27 已提交
50
  const std::string data_format = ctx->Attrs().Get<std::string>("data_format");
51 52 53 54 55

  // MKL-DNN Kernels are using NCHW order of dims description
  // so we ignore data_format consideration for MKL-DNN kernel
  const bool channel_last = (this->IsMKLDNNType() == false) &&
                            (data_format == "NHWC" || data_format == "NDHWC");
C
chengduoZH 已提交
56

57 58 59 60 61
  PADDLE_ENFORCE_EQ(
      in_dims.size() == 4 || in_dims.size() == 5, true,
      "ShapeError: the input of Op(conv) should be 4-D or 5-D Tensor. But "
      "received: %u-D Tensor, the shape of input is [%s].",
      in_dims.size(), in_dims);
62

C
chengduoZH 已提交
63 64
  PADDLE_ENFORCE_EQ(
      in_dims.size(), filter_dims.size(),
65 66 67 68
      "ShapeError: the input's dimension size and filter's dimension size of "
      "Op(conv) should be equal. But received: the shape of input is [%s], "
      "the dimension size of input is [%d], the shape of filter is [%s],  "
      "the dimension size of filter is [%d].",
69 70 71 72
      in_dims, in_dims.size(), filter_dims, filter_dims.size());

  int in_sub_stride_size = in_dims.size() - strides.size();
  PADDLE_ENFORCE_EQ(in_dims.size() - strides.size() == 2U, true,
73 74 75 76 77 78
                    "ShapeError: the dimension size of input minus the size of "
                    "Attr(stride) must be euqal to 2 for Op(conv)."
                    "But received: the dimension size of input minus the size "
                    "of Attr(stride) is [%d], the "
                    "input's dimension size is [%d], the shape of input "
                    "is [%s], the Attr(stride)'s size is [%d].",
79 80
                    in_sub_stride_size, in_dims.size(), in_dims,
                    strides.size());
L
liym27 已提交
81 82 83

  const auto input_channels =
      channel_last ? in_dims[in_dims.size() - 1] : in_dims[1];
F
fengjiayi 已提交
84

85 86 87
  PADDLE_ENFORCE_EQ(
      input_channels, filter_dims[1] * groups,
      "ShapeError: The number of input channels should be equal to filter "
88 89 90 91 92 93 94
      "channels * groups for Op(conv). But received: the input's channels is "
      "[%d], the shape "
      "of input is [%s], the filter's channel is [%d], the shape of filter is "
      "[%s], the groups is [%d], the data_format is %s. The error may come "
      "from wrong data_format setting.",
      input_channels, in_dims, filter_dims[1], filter_dims, groups,
      data_format);
C
chengduoZH 已提交
95
  PADDLE_ENFORCE_EQ(
Y
Yang Yu 已提交
96
      filter_dims[0] % groups, 0,
97 98 99 100
      "ShapeError: The number of output channels of Op(conv) should be divided "
      "by groups. "
      "But received: the output channels is [%d], the shape of filter is [%s] "
      "(the first dimension of filter is output channel), the groups is [%d].",
101
      filter_dims[0], filter_dims, groups);
C
chengduoZH 已提交
102

L
liym27 已提交
103
  framework::DDim in_data_dims;
104
  framework::DDim filter_data_dims;
L
liym27 已提交
105 106 107 108 109
  if (channel_last) {
    in_data_dims = framework::slice_ddim(in_dims, 1, in_dims.size() - 1);
  } else {
    in_data_dims = framework::slice_ddim(in_dims, 2, in_dims.size());
  }
110 111 112

  filter_data_dims = framework::slice_ddim(filter_dims, 2, filter_dims.size());

L
liym27 已提交
113 114 115 116 117 118 119 120
  std::vector<int> ksize = framework::vectorize<int>(filter_data_dims);
  UpdatePaddingAndDilation(&paddings, &dilations, padding_algorithm,
                           in_data_dims, strides, ksize);

  std::vector<int64_t> output_shape({in_dims[0]});
  if (!channel_last) {
    output_shape.push_back(filter_dims[0]);
  }
121
  for (int i = 0; i < in_data_dims.size(); ++i) {
T
tink2123 已提交
122
    if ((!ctx->IsRuntime()) &&
L
liym27 已提交
123
        (in_data_dims[i] <= 0 || filter_dims[i + 2] <= 0)) {
T
tink2123 已提交
124 125
      output_shape.push_back(-1);
    } else {
126 127 128
      output_shape.push_back(
          ConvOutputSize(in_data_dims[i], filter_data_dims[i], dilations[i],
                         paddings[2 * i], paddings[2 * i + 1], strides[i]));
T
tink2123 已提交
129
    }
C
chengduoZH 已提交
130
  }
L
liym27 已提交
131 132 133 134
  if (channel_last) {
    output_shape.push_back(filter_dims[0]);
  }

135
  ctx->SetOutputDim("Output", framework::make_ddim(output_shape));
Y
Yang Yu 已提交
136
  ctx->ShareLoD("Input", "Output");
C
chengduoZH 已提交
137 138
}

139 140
framework::OpKernelType ConvOp::GetExpectedKernelType(
    const framework::ExecutionContext& ctx) const {
X
Xin Pan 已提交
141 142
  int customized_type_value =
      framework::OpKernelType::kDefaultCustomizedTypeValue;
143
  framework::LibraryType library{framework::LibraryType::kPlain};
M
mozga-intel 已提交
144
  // TODO(pzelazko-intel): enable MKLDNN layout when it's ready
145
  auto input_data_type = OperatorWithKernel::IndicateVarDataType(ctx, "Input");
L
liym27 已提交
146 147
  std::string data_format =
      "AnyLayout";  // todo enable data layout when it's ready
M
mozga-intel 已提交
148 149
  framework::DataLayout layout = framework::StringToDataLayout(data_format);

C
chengduoZH 已提交
150
#ifdef PADDLE_WITH_CUDA
151
  if (platform::CanCUDNNBeUsed(ctx)) {
152
    library = framework::LibraryType::kCUDNN;
C
chengduoZH 已提交
153 154
  }
#endif
155
#ifdef PADDLE_WITH_MKLDNN
156
  if (library == framework::LibraryType::kPlain &&
157
      platform::CanMKLDNNBeUsed(ctx)) {
158
    library = framework::LibraryType::kMKLDNN;
M
mozga-intel 已提交
159
    layout = framework::DataLayout::kMKLDNN;
160
    customized_type_value =
161 162
        (input_data_type == framework::DataTypeTrait<int8_t>::DataType() ||
         input_data_type == framework::DataTypeTrait<uint8_t>::DataType())
163 164
            ? kConvMKLDNNINT8
            : kConvMKLDNNFP32;
165
  }
166
#endif
167

168 169 170 171 172 173
  if (input_data_type != framework::proto::VarType::INT8 &&
      input_data_type != framework::proto::VarType::UINT8) {
    auto filter_data_type = ctx.Input<Tensor>("Filter")->type();
    PADDLE_ENFORCE_EQ(input_data_type, filter_data_type,
                      "input and filter data type should be consistent");
  }
K
Kexin Zhao 已提交
174
  if (input_data_type == framework::proto::VarType::FP16) {
175
    PADDLE_ENFORCE_EQ(library, framework::LibraryType::kCUDNN,
K
Kexin Zhao 已提交
176 177 178
                      "float16 can only be used when CUDNN is used");
  }

179 180 181
  auto type = framework::OpKernelType(input_data_type, ctx.GetPlace(), layout,
                                      library, customized_type_value);
  return type;
182 183
}

184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199
framework::OpKernelType ConvOp::GetKernelTypeForVar(
    const std::string& var_name, const Tensor& tensor,
    const framework::OpKernelType& expected_kernel_type) const {
#ifdef PADDLE_WITH_MKLDNN
  // Only input require reshaping, weights and
  // bias are having shape in NCHW order
  if ((var_name == "Input") &&
      (expected_kernel_type.data_layout_ == framework::DataLayout::kMKLDNN) &&
      (tensor.layout() != framework::DataLayout::kMKLDNN)) {
    auto attrs = Attrs();
    auto ar = paddle::framework::AttrReader(attrs);
    const std::string data_format = ar.Get<std::string>("data_format");
    auto dl = framework::StringToDataLayout(data_format);
    // Some models may have intentionally set "AnyLayout" for pool
    // op. Treat this as NCHW (default data_format value)
    if (dl != framework::DataLayout::kAnyLayout) {
200 201
      return framework::OpKernelType(expected_kernel_type.data_type_,
                                     tensor.place(), dl);
202 203 204 205 206 207 208
    }
  }
#endif
  return framework::OpKernelType(expected_kernel_type.data_type_,
                                 tensor.place(), tensor.layout());
}

Y
Yu Yang 已提交
209
void Conv2DOpMaker::Make() {
210 211 212 213
  AddAttr<bool>("is_test",
                "(bool, default false) Set to true for inference only, false "
                "for training. Some layers may run faster when this is true.")
      .SetDefault(false);
L
liym27 已提交
214 215 216 217 218 219
  AddInput("Input",
           "(Tensor) The input tensor of convolution operator. "
           "The format of input tensor is NCHW or NHWC, where N is batch size, "
           "C is the "
           "number of channels, H is the height of the feature, "
           "and W is the width of the feature.");
C
chengduoZH 已提交
220
  AddInput("Filter",
C
fix doc  
chengduoZH 已提交
221
           "(Tensor) The filter tensor of convolution operator. "
C
chengduoZH 已提交
222 223
           "The format of the filter tensor is MCHW, where M is the number of "
           "output image channels, C is the number of input image channels, "
C
fix doc  
chengduoZH 已提交
224 225
           "H is the height of the filter, and W is the width of the filter. "
           "If the groups attribute is greater than 1, C equals the number of "
C
chengduoZH 已提交
226
           "input image channels divided by the groups.");
227 228 229 230 231
  AddInput("Bias",
           "(Tensor) Bias to be added to each output of filter application."
           "The format of output tensor is X (one-dimensional) of size equal"
           "to the number of output channels. Only used with MKL-DNN.")
      .AsDispensable();
232 233 234
  AddInput("ResidualData",
           "(Tensor) Tensor with residual data "
           "to which convolution output will be added."
235
           "Used with fuse_residual_connection fusion.")
236
      .AsDispensable();
Y
Yihua Xu 已提交
237 238
  AddOutput("Output",
            "(Tensor) The output tensor of convolution operator. "
L
liym27 已提交
239
            "It has same data fromat and data type as the Input.");
C
chengduoZH 已提交
240 241 242 243
  AddAttr<std::vector<int>>("strides",
                            "(vector<int> default:{1, 1}), the "
                            "strides(h_stride, w_stride) of "
                            "convolution operator.")
C
chengduoZH 已提交
244
      .SetDefault({1, 1});
C
chengduoZH 已提交
245 246
  AddAttr<std::vector<int>>("paddings",
                            "(vector<int> default:{0, 0}), the "
L
liym27 已提交
247 248
                            "paddings(pad_height_top, pad_height_bottom, "
                            "pad_width_left, pad_wifth_right)  of "
C
chengduoZH 已提交
249
                            "convolution operator.")
C
chengduoZH 已提交
250
      .SetDefault({0, 0});
L
liym27 已提交
251 252 253 254 255 256
  AddAttr<std::string>(
      "padding_algorithm",
      "(string, default \"EXPLICIT\") An optional string from: \"EXPLICIT\","
      "\"SAME\",\"VALID\". Set to \"EXPLICIT\" for explicit padding. "
      "Set to \"SAME\" or \"VALID\" for algorithm of padding. ")
      .SetDefault("EXPLICIT");
C
chengduoZH 已提交
257 258
  AddAttr<int>(
      "groups",
C
chengduoZH 已提交
259
      "(int default:1), the groups number of the convolution operator. "
C
fix doc  
chengduoZH 已提交
260 261 262 263
      "According to grouped convolution in Alex Krizhevsky's Deep CNN paper: "
      "when group=2, the first half of the filters is only connected to the "
      "first half of the input channels, while the second half of the filters "
      "is only connected to the second half of the input channels.")
C
chengduoZH 已提交
264
      .SetDefault(1);
C
chengduoZH 已提交
265
  AddAttr<std::vector<int>>("dilations",
C
chengduoZH 已提交
266 267
                            "(vector<int> default:{1, 1}), the "
                            "dilations(h_dilation, w_dilation) of "
C
chengduoZH 已提交
268
                            "convolution operator.")
C
chengduoZH 已提交
269
      .SetDefault({1, 1});
270 271 272 273
  AddAttr<bool>(
      "use_cudnn",
      "(bool, default false) Only used in cudnn kernel, need install cudnn")
      .SetDefault(false);
274 275 276
  AddAttr<bool>("fuse_relu_before_depthwise_conv",
                "(bool, default false) Only used in cuda depthwise kernel")
      .SetDefault(false);
277 278 279
  AddAttr<bool>("use_mkldnn",
                "(bool, default false) Only used in mkldnn kernel")
      .SetDefault(false);
280 281 282 283 284 285
  AddAttr<bool>("use_quantizer",
                "(bool, default false) "
                "Set to true for operators that should be quantized and use "
                "int8 kernel. "
                "Only used on CPU.")
      .SetDefault(false);
M
Michal Gallus 已提交
286 287
  AddAttr<bool>("fuse_relu", "(bool, default false) Only used in mkldnn kernel")
      .SetDefault(false);
288 289 290 291 292 293
  AddAttr<bool>("fuse_brelu",
                "(bool, default false) Only used in mkldnn kernel")
      .SetDefault(false);
  AddAttr<float>("fuse_brelu_threshold",
                 "(float, default false 6.0) Only used in mkldnn kernel")
      .SetDefault(6.0f);
294 295 296 297 298 299 300 301
  AddAttr<std::string>("fuse_activation",
                       "(string, default \"\") Only used in mkldnn kernel")
      .SetDefault("");
  AddAttr<float>("fuse_alpha",
                 "(float, default 0.0) Only used in mkldnn kernel")
      .SetDefault(0.0f);
  AddAttr<float>("fuse_beta", "(float, default 0.0) Only used in mkldnn kernel")
      .SetDefault(0.0f);
302
  AddAttr<bool>("fuse_residual_connection",
303
                "(bool, default false) Only used in mkldnn kernel. Used "
304 305
                "whenever convolution output is as an input to residual "
                "connection.")
306
      .SetDefault(false);
307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326
  AddAttr<float>("Scale_in",
                 "Scale_in to be used for int8 input data."
                 "Only used with MKL-DNN INT8.")
      .SetDefault(1.0f);
  AddAttr<float>("Scale_out",
                 "Scale_out to be used for int8 output data."
                 "Only used with MKL-DNN INT8.")
      .SetDefault(1.0f);
  AddAttr<float>("Scale_in_eltwise",
                 "Scale_in_eltwise to be used for int8 eltwise input data."
                 "Only used with MKL-DNN INT8.")
      .SetDefault(1.0f);
  AddAttr<std::vector<float>>("Scale_weights",
                              "Scale_weights to be used for int8 weights data."
                              "Only used with MKL-DNN INT8.")
      .SetDefault({1.0f});
  AddAttr<bool>("force_fp32_output",
                "(bool, default false) Force INT8 kernel output FP32, only "
                "used in MKL-DNN INT8")
      .SetDefault(false);
327 328 329 330 331 332
  AddAttr<std::string>(
      "data_format",
      "(string, default NCHW) Only used in "
      "An optional string from: \"NHWC\", \"NCHW\". "
      "Defaults to \"NHWC\". Specify the data format of the output data, "
      "the input will be transformed automatically. ")
L
liym27 已提交
333
      .SetDefault("NCHW");
334 335 336 337 338 339 340 341
  // TODO(dzhwinter): need to registered layout transform function
  AddAttr<int>("workspace_size_MB",
               "Only used in cudnn kernel. Need set use_cudnn to true."
               "workspace size for cudnn, in MB, "
               "workspace is a section of GPU memory which will be "
               "allocated/freed each time the operator runs, larger "
               "workspace size can increase performance but also requires "
               "better hardware. This size should be chosen carefully.")
342
      .SetDefault(platform::GetDefaultConvWorkspaceSizeLimitMB());
343 344
  AddAttr<bool>("exhaustive_search",
                "(bool, default false) cuDNN has many algorithm to calculation "
C
chengduo 已提交
345
                "convolution, whether enable exhaustive search "
翟飞跃 已提交
346
                "for cuDNN convolution or not, default is False.")
347
      .SetDefault(false);
L
liym27 已提交
348

C
chengduoZH 已提交
349
  AddComment(R"DOC(
C
fix doc  
chengduoZH 已提交
350 351
Convolution Operator.

C
chengduoZH 已提交
352
The convolution operation calculates the output based on the input, filter
C
chengduoZH 已提交
353
and strides, paddings, dilations, groups parameters. The size of each dimension of the
C
chengduoZH 已提交
354
parameters is checked in the infer-shape.
L
liym27 已提交
355
Input(Input) and Output(Output) are in NCHW or NHWC format. Where N is batch
C
fix doc  
chengduoZH 已提交
356
size, C is the number of channels, H is the height of the feature, and W is
C
chengduoZH 已提交
357
the width of the feature.
358
Filters(Input) is MCHW format format. Where M is the number of output image channels, C is
C
chengduoZH 已提交
359 360 361 362
the number of input image channels, H is the height of the filter, and W
is the width of the filter.
Parameters(strides, paddings, dilations) are two elements. These two elements represent
height and width, respectively.
C
chengduoZH 已提交
363 364 365 366
The input(X) size and output(Out) size may be different.

Example:
  Input:
C
chengduoZH 已提交
367 368
       Input shape: $(N, C_{in}, H_{in}, W_{in})$
       Filter shape: $(C_{out}, C_{in}, H_f, W_f)$
C
chengduoZH 已提交
369
  Output:
C
chengduoZH 已提交
370 371 372
       Output shape: $(N, C_{out}, H_{out}, W_{out})$
  Where
$$
L
liym27 已提交
373 374
       H_{out}= \frac{(H_{in} + pad_height_top + pad_height_bottom - (dilations[0] * (H_f - 1) + 1))}{strides[0]}+ 1 \\
       W_{out}= \frac{(W_{in} + pad_width_left + pad_width_right - (dilations[1] * (W_f - 1) + 1))}{strides[1]}+ 1
C
chengduoZH 已提交
375
$$
C
chengduoZH 已提交
376
)DOC");
Q
qingqing01 已提交
377
  Apply();
C
chengduoZH 已提交
378 379
}

Y
Yu Yang 已提交
380
void Conv3DOpMaker::Make() {
381 382 383 384
  AddAttr<bool>("is_test",
                "(bool, default false) Set to true for inference only, false "
                "for training. Some layers may run faster when this is true.")
      .SetDefault(false);
C
chengduoZH 已提交
385 386
  AddInput(
      "Input",
C
fix doc  
chengduoZH 已提交
387
      "(Tensor) The input tensor of convolution operator. "
L
liym27 已提交
388 389
      "The format of input tensor is NCDHW or NDHWC. Where N is batch size, C "
      "is the "
C
fix doc  
chengduoZH 已提交
390 391 392
      "number of channels, D is the depth of the feature, H is the height of "
      "the feature, "
      "and W is the width of the feature.");
C
chengduoZH 已提交
393
  AddInput("Filter",
C
fix doc  
chengduoZH 已提交
394
           "(Tensor) The filter tensor of convolution operator. "
C
chengduoZH 已提交
395 396
           "The format of the filter tensor is MCDHW, where M is the number of "
           "output image channels, C is the number of input image channels, "
C
fix doc  
chengduoZH 已提交
397 398 399
           "D is the depth of the filter, H is the height of the filter, and W "
           "is the width of the filter."
           "If the groups attribute is greater than 1, C equals the number of "
C
chengduoZH 已提交
400
           "input image channels divided by the groups.");
401 402 403 404 405
  AddInput("ResidualData",
           "(Tensor) Tensor with residual data "
           "to which convolution output will be added."
           "Used with fuse_residual_connection fusion.")
      .AsDispensable();
Y
Yihua Xu 已提交
406 407
  AddOutput("Output",
            "(Tensor) The output tensor of convolution operator."
L
liym27 已提交
408
            "It has same data fromat and data type as the Input.");
C
chengduoZH 已提交
409 410 411 412
  AddAttr<std::vector<int>>("strides",
                            "(vector<int>, default:{1, 1, 1}), the "
                            "strides(d_stride, h_stride, w_stride) of "
                            "convolution operator.")
C
chengduoZH 已提交
413
      .SetDefault({1, 1, 1});
L
liym27 已提交
414 415 416 417 418 419
  AddAttr<std::vector<int>>(
      "paddings",
      "(vector<int>, default:{0, 0, 0}), the "
      "paddings(pad_depth_front, pad_depth_back, pad_height_top, "
      "pad_height_bottom, pad_width_left, pad_width_right) of convolution "
      "operator.")
C
chengduoZH 已提交
420
      .SetDefault({0, 0, 0});
L
liym27 已提交
421 422 423 424 425 426
  AddAttr<std::string>(
      "padding_algorithm",
      "(string, default \"EXPLICIT\") An optional string from: \"EXPLICIT\","
      "\"SAME\",\"VALID\". Set to \"EXPLICIT\" for explicit padding. "
      "Set to \"SAME\" or \"VALID\" for algorithm of padding. ")
      .SetDefault("EXPLICIT");
C
chengduoZH 已提交
427 428
  AddAttr<int>(
      "groups",
C
chengduoZH 已提交
429
      "(int default:1), the groups number of the convolution operator. "
C
fix doc  
chengduoZH 已提交
430 431 432 433
      "According to grouped convolution in Alex Krizhevsky's Deep CNN paper: "
      "when group=2, the first half of the filters is only connected to the "
      "first half of the input channels, while the second half of the filters "
      "is only connected to the second half of the input channels.")
C
chengduoZH 已提交
434
      .SetDefault(1);
C
chengduoZH 已提交
435
  AddAttr<std::vector<int>>("dilations",
C
chengduoZH 已提交
436 437
                            "(vector<int> default:{1, 1, 1}), the "
                            "dilations(d_dilation, h_dilation, w_dilation) of "
C
chengduoZH 已提交
438
                            "convolution operator.")
C
chengduoZH 已提交
439
      .SetDefault({1, 1, 1});
440 441 442 443
  AddAttr<bool>(
      "use_cudnn",
      "(bool, default false) Only used in cudnn kernel, need install cudnn")
      .SetDefault(false);
444 445 446
  AddAttr<bool>("use_mkldnn",
                "(bool, default false) Only used in mkldnn kernel")
      .SetDefault(false);
447 448
  AddAttr<bool>("fuse_relu", "(bool, default false) Only used in mkldnn kernel")
      .SetDefault(false);
449 450 451 452 453 454 455 456
  AddAttr<std::string>("fuse_activation",
                       "(string, default \"\") Only used in mkldnn kernel")
      .SetDefault("");
  AddAttr<float>("fuse_alpha",
                 "(float, default 0.0) Only used in mkldnn kernel")
      .SetDefault(0.0f);
  AddAttr<float>("fuse_beta", "(float, default 0.0) Only used in mkldnn kernel")
      .SetDefault(0.0f);
457 458 459 460 461
  AddAttr<bool>("fuse_residual_connection",
                "(bool, default false) Only used in mkldnn kernel. Used "
                "whenever convolution output is as an input to residual "
                "connection.")
      .SetDefault(false);
462 463
  AddAttr<std::string>(
      "data_format",
L
liym27 已提交
464 465 466
      "(string, default NCDHW) Only used in "
      "An optional string from: \"NDHWC\", \"NCDHW\". "
      "Defaults to \"NDHWC\". Specify the data format of the output data, "
467
      "the input will be transformed automatically. ")
L
liym27 已提交
468
      .SetDefault("NCDHW");
469 470 471
  AddAttr<bool>("force_fp32_output",
                "(bool, default false) Only used in mkldnn INT8 kernel")
      .SetDefault(false);
472 473 474 475 476 477 478
  // TODO(dzhwinter): need to registered layout transform function
  AddAttr<int>("workspace_size_MB",
               "Only used in cudnn kernel. workspace size for cudnn, in MB, "
               "workspace is a section of GPU memory which will be "
               "allocated/freed each time the operator runs, larger "
               "workspace size can increase performance but also requires "
               "better hardware. This size should be chosen carefully.")
479
      .SetDefault(platform::GetDefaultConvWorkspaceSizeLimitMB());
480 481
  AddAttr<bool>("exhaustive_search",
                "(bool, default false) cuDNN has many algorithm to calculation "
C
chengduo 已提交
482
                "convolution, whether enable exhaustive search "
翟飞跃 已提交
483
                "for cuDNN convolution or not, default is False.")
484
      .SetDefault(false);
C
chengduoZH 已提交
485
  AddComment(R"DOC(
C
fix doc  
chengduoZH 已提交
486 487
Convolution3D Operator.

C
chengduoZH 已提交
488
The convolution operation calculates the output based on the input, filter
C
chengduoZH 已提交
489
and strides, paddings, dilations, groups parameters. The size of each dimension of the
C
chengduoZH 已提交
490
parameters is checked in the infer-shape.
L
liym27 已提交
491
Input(Input) and output(Output) are in NCDHW or NDHWC format, where N is batch
C
fix doc  
chengduoZH 已提交
492
size, C is the number of channels,D is the depth of the feature, H is the height of
C
chengduoZH 已提交
493 494 495 496 497 498
the feature, and W is the width of the feature.
Filters(Input) is MCDHW format, where M is the number of output image channels,
C is the number of input image channels, D is the depth of the filter,
H is the height of the filter, and W is the width of the filter.
Parameters(strides, paddings, dilations) are three elements. These three elements
represent depth, height and width, respectively.
C
fix doc  
chengduoZH 已提交
499 500 501 502
The input(X) size and output(Out) size may be different.

Example:
  Input:
C
chengduoZH 已提交
503 504
       Input shape: $(N, C_{in}, D_{in}, H_{in}, W_{in})$
       Filter shape: $(C_{out}, C_{in}, D_f, H_f, W_f)$
C
fix doc  
chengduoZH 已提交
505
  Output:
C
chengduoZH 已提交
506 507 508
       Output shape: $(N, C_{out}, D_{out}, H_{out}, W_{out})$
  Where
  $$
L
liym27 已提交
509 510 511
       D_{out}= \frac{(D_{in} + pad_depth_front + pad_depth_back - (dilations[0] * (D_f - 1) + 1))}{ strides[0]}+ 1 \\
       H_{out}= \frac{(H_{in} + pad_height_top + pad_height_bottom - (dilations[1] * (H_f - 1) + 1))}{ strides[1]}+ 1 \\
       W_{out}= \frac{(W_{in} + pad_width_left + pad_width_right - (dilations[2] * (W_f - 1) + 1))}{ strides[2]}+ 1
C
chengduoZH 已提交
512
  $$
C
chengduoZH 已提交
513
)DOC");
Q
qingqing01 已提交
514
  Apply();
C
chengduoZH 已提交
515 516
}

C
chengduoZH 已提交
517 518 519 520 521 522 523 524 525 526 527
void ConvOpGrad::InferShape(framework::InferShapeContext* ctx) const {
  auto in_dims = ctx->GetInputDim("Input");
  auto filter_dims = ctx->GetInputDim("Filter");
  if (ctx->HasOutput(framework::GradVarName("Input"))) {
    ctx->SetOutputDim(framework::GradVarName("Input"), in_dims);
  }
  if (ctx->HasOutput(framework::GradVarName("Filter"))) {
    ctx->SetOutputDim(framework::GradVarName("Filter"), filter_dims);
  }
}

528 529
framework::OpKernelType ConvOpGrad::GetExpectedKernelType(
    const framework::ExecutionContext& ctx) const {
X
Xin Pan 已提交
530 531
  int customized_type_value =
      framework::OpKernelType::kDefaultCustomizedTypeValue;
532
  framework::LibraryType library_{framework::LibraryType::kPlain};
M
mozga-intel 已提交
533
  // TODO(pzelazko-intel): enable MKLDNN layout when it's ready
L
liym27 已提交
534
  std::string data_format = "AnyLayout";
M
mozga-intel 已提交
535 536
  framework::DataLayout layout_ = framework::StringToDataLayout(data_format);

C
chengduoZH 已提交
537
#ifdef PADDLE_WITH_CUDA
538 539
  if (platform::CanCUDNNBeUsed(ctx)) {
    library_ = framework::LibraryType::kCUDNN;
C
chengduoZH 已提交
540 541
  }
#endif
542 543 544
#ifdef PADDLE_WITH_MKLDNN
  if (library_ == framework::LibraryType::kPlain &&
      platform::CanMKLDNNBeUsed(ctx)) {
545
    const std::string data_format = ctx.Attr<std::string>("data_format");
546
    library_ = framework::LibraryType::kMKLDNN;
M
mozga-intel 已提交
547
    layout_ = framework::DataLayout::kMKLDNN;
X
Xin Pan 已提交
548
    customized_type_value = kConvMKLDNNFP32;
549
  }
550
#endif
551

552 553 554
  auto type = framework::OpKernelType(
      OperatorWithKernel::IndicateVarDataType(ctx, "Input"), ctx.GetPlace(),
      layout_, library_, customized_type_value);
555
  return type;
556 557
}

558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583
framework::OpKernelType ConvOpGrad::GetKernelTypeForVar(
    const std::string& var_name, const Tensor& tensor,
    const framework::OpKernelType& expected_kernel_type) const {
#ifdef PADDLE_WITH_MKLDNN
  // Only input require reshaping, weights and
  // bias are having shape in NCHW order
  if (((var_name == "Input") ||
       (var_name == framework::GradVarName("Output"))) &&
      (expected_kernel_type.data_layout_ == framework::DataLayout::kMKLDNN) &&
      (tensor.layout() != framework::DataLayout::kMKLDNN)) {
    auto attrs = Attrs();
    auto ar = paddle::framework::AttrReader(attrs);
    const std::string data_format = ar.Get<std::string>("data_format");
    auto dl = framework::StringToDataLayout(data_format);
    // Some models may have intentionally set "AnyLayout" for pool
    // op. Treat this as NCHW (default data_format value)
    if (dl != framework::DataLayout::kAnyLayout) {
      return framework::OpKernelType(expected_kernel_type.data_type_,
                                     tensor.place(), dl);
    }
  }
#endif
  return framework::OpKernelType(expected_kernel_type.data_type_,
                                 tensor.place(), tensor.layout());
}

H
hong 已提交
584 585
template <typename T>
class Conv2DGradMaker : public framework::SingleGradOpMaker<T> {
586
 public:
H
hong 已提交
587
  using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
588

589
  void Apply(GradOpPtr<T> op) const override {
S
sneaxiy 已提交
590
    op->SetType(this->ForwardOpType() + "_grad");
H
hong 已提交
591 592 593 594
    op->SetInput("Input", this->Input("Input"));
    op->SetInput("Filter", this->Input("Filter"));
    op->SetInput("Bias", this->Input("Bias"));
    op->SetInput(framework::GradVarName("Output"), this->OutputGrad("Output"));
595

H
hong 已提交
596 597 598 599
    op->SetOutput(framework::GradVarName("Input"), this->InputGrad("Input"));
    op->SetOutput(framework::GradVarName("Filter"), this->InputGrad("Filter"));
    op->SetOutput(framework::GradVarName("Bias"), this->InputGrad("Bias"));
    op->SetAttrMap(this->Attrs());
600
  }
S
sneaxiy 已提交
601 602
};

H
hong 已提交
603 604
template <typename T>
class Conv3DGradMaker : public framework::SingleGradOpMaker<T> {
S
sneaxiy 已提交
605
 public:
H
hong 已提交
606
  using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
607

608
  void Apply(GradOpPtr<T> op) const override {
S
sneaxiy 已提交
609
    op->SetType(this->ForwardOpType() + "_grad");
H
hong 已提交
610 611 612
    op->SetInput("Input", this->Input("Input"));
    op->SetInput("Filter", this->Input("Filter"));
    op->SetInput(framework::GradVarName("Output"), this->OutputGrad("Output"));
S
sneaxiy 已提交
613

H
hong 已提交
614 615
    op->SetOutput(framework::GradVarName("Input"), this->InputGrad("Input"));
    op->SetOutput(framework::GradVarName("Filter"), this->InputGrad("Filter"));
S
sneaxiy 已提交
616

H
hong 已提交
617 618
    if (this->HasInput("ResidualData")) {
      op->SetInput("ResidualData", this->Input("ResidualData"));
S
sneaxiy 已提交
619 620
    }

H
hong 已提交
621
    op->SetAttrMap(this->Attrs());
622 623 624
  }
};

Q
qingqing01 已提交
625 626 627 628
/*
 * Inputs:  I, W, dO, ddI, ddW
 * Outputs: ddO, dW, dI
 */
H
hong 已提交
629 630
template <typename T>
class Conv2DDoubleGradMaker : public framework::SingleGradOpMaker<T> {
Q
qingqing01 已提交
631
 public:
H
hong 已提交
632
  using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
Q
qingqing01 已提交
633

634
  void Apply(GradOpPtr<T> op) const override {
Q
qingqing01 已提交
635 636
    op->SetType(this->ForwardOpType() + "_grad");
    // I, W, dO, ddI, ddW
H
hong 已提交
637 638 639 640 641 642
    op->SetInput("Input", this->Input("Input"));
    op->SetInput("Filter", this->Input("Filter"));
    op->SetInput("DOutput", this->Input(framework::GradVarName("Output")));
    op->SetInput("DDInput", this->OutputGrad(framework::GradVarName("Input")));
    op->SetInput("DDFilter",
                 this->OutputGrad(framework::GradVarName("Filter")));
Q
qingqing01 已提交
643 644 645 646

    // ddO, dI, dW
    // Unlike grad op, double grad op does not use name@GRAD@GRAD
    // as key of ops' inputs and outputs.
H
hong 已提交
647 648
    auto ddx = this->OutputGrad(framework::GradVarName("Input"));
    auto ddw = this->OutputGrad(framework::GradVarName("Filter"));
649

L
lvmengsi 已提交
650
    op->SetOutput("DDOutput",
H
hong 已提交
651
                  ddx.empty()
652
                      ? this->EmptyInputGrad()
H
hong 已提交
653
                      : this->InputGrad(framework::GradVarName("Output")));
654 655 656 657
    op->SetOutput("DFilter", ddx.empty() ? this->EmptyInputGrad()
                                         : this->InputGrad("Filter"));
    op->SetOutput("DInput", ddw.empty() ? this->EmptyInputGrad()
                                        : this->InputGrad("Input"));
658

H
hong 已提交
659
    op->SetAttrMap(this->Attrs());
Q
qingqing01 已提交
660 661 662
  }
};

L
lvmengsi 已提交
663 664 665 666
/*
 * Inputs:  I, W, dO, ddI, ddW
 * Outputs: ddO, dW, dI
 */
H
hong 已提交
667 668
template <typename T>
class Conv3DDoubleGradMaker : public framework::SingleGradOpMaker<T> {
L
lvmengsi 已提交
669
 public:
H
hong 已提交
670
  using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
L
lvmengsi 已提交
671

672
  void Apply(GradOpPtr<T> op) const override {
L
lvmengsi 已提交
673 674
    op->SetType(this->ForwardOpType() + "_grad");
    // I, W, dO, ddI, ddW
H
hong 已提交
675 676 677 678 679 680
    op->SetInput("Input", this->Input("Input"));
    op->SetInput("Filter", this->Input("Filter"));
    op->SetInput("DOutput", this->Input(framework::GradVarName("Output")));
    op->SetInput("DDInput", this->OutputGrad(framework::GradVarName("Input")));
    op->SetInput("DDFilter",
                 this->OutputGrad(framework::GradVarName("Filter")));
L
lvmengsi 已提交
681

H
hong 已提交
682 683
    auto ddx = this->OutputGrad(framework::GradVarName("Input"));
    auto ddw = this->OutputGrad(framework::GradVarName("Filter"));
L
lvmengsi 已提交
684

L
lvmengsi 已提交
685
    op->SetOutput("DDOutput",
H
hong 已提交
686
                  ddx.empty()
687
                      ? this->EmptyInputGrad()
H
hong 已提交
688
                      : this->InputGrad(framework::GradVarName("Output")));
689 690 691 692
    op->SetOutput("DFilter", ddx.empty() ? this->EmptyInputGrad()
                                         : this->InputGrad("Filter"));
    op->SetOutput("DInput", ddw.empty() ? this->EmptyInputGrad()
                                        : this->InputGrad("Input"));
L
lvmengsi 已提交
693

H
hong 已提交
694
    op->SetAttrMap(this->Attrs());
L
lvmengsi 已提交
695 696 697
  }
};

Q
qingqing01 已提交
698 699 700 701 702
void ConvOpDoubleGrad::InferShape(framework::InferShapeContext* ctx) const {
  auto x_dims = ctx->GetInputDim("Input");
  auto w_dims = ctx->GetInputDim("Filter");
  auto do_dims = ctx->GetInputDim("DOutput");

L
lvmengsi 已提交
703 704
  if (ctx->HasOutput("DDOutput") &&
      (ctx->HasInput("DDInput") || (ctx->HasInput("DDFilter")))) {
Q
qingqing01 已提交
705 706
    ctx->SetOutputDim("DDOutput", do_dims);
  }
707
  if (ctx->HasOutput("DFilter") && ctx->HasInput("DDInput")) {
Q
qingqing01 已提交
708 709
    ctx->SetOutputDim("DFilter", w_dims);
  }
710
  if (ctx->HasOutput("DInput") && ctx->HasInput("DDFilter")) {
Q
qingqing01 已提交
711 712 713 714 715 716 717 718 719
    ctx->SetOutputDim("DInput", x_dims);
  }
}

framework::OpKernelType ConvOpDoubleGrad::GetExpectedKernelType(
    const framework::ExecutionContext& ctx) const {
  int customized_type_value =
      framework::OpKernelType::kDefaultCustomizedTypeValue;
  framework::LibraryType library_{framework::LibraryType::kPlain};
L
liym27 已提交
720
  std::string data_format = "AnyLayout";
Q
qingqing01 已提交
721 722 723 724 725
  framework::DataLayout layout_ = framework::StringToDataLayout(data_format);

#ifdef PADDLE_WITH_CUDA
  if (platform::CanCUDNNBeUsed(ctx)) {
    library_ = framework::LibraryType::kCUDNN;
L
lvmengsi 已提交
726
  }
Q
qingqing01 已提交
727
#endif
728 729 730
  auto type = framework::OpKernelType(
      OperatorWithKernel::IndicateVarDataType(ctx, "Input"), ctx.GetPlace(),
      layout_, library_, customized_type_value);
Q
qingqing01 已提交
731 732 733
  return type;
}

C
chengduoZH 已提交
734 735 736 737
}  // namespace operators
}  // namespace paddle

namespace ops = paddle::operators;
Y
Yang Yang 已提交
738
REGISTER_OPERATOR(conv2d, ops::ConvOp, ops::Conv2DOpMaker,
H
hong 已提交
739 740 741 742 743 744
                  ops::ConvOpInferVarType,
                  ops::Conv2DGradMaker<paddle::framework::OpDesc>,
                  ops::Conv2DGradMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(conv2d_grad, ops::ConvOpGrad,
                  ops::Conv2DDoubleGradMaker<paddle::framework::OpDesc>,
                  ops::Conv2DDoubleGradMaker<paddle::imperative::OpBase>);
Q
qingqing01 已提交
745
REGISTER_OPERATOR(conv2d_grad_grad, ops::ConvOpDoubleGrad);
746 747

// depthwise convolution op
Y
Yang Yang 已提交
748
REGISTER_OPERATOR(depthwise_conv2d, ops::ConvOp, ops::Conv2DOpMaker,
H
hong 已提交
749 750 751
                  ops::ConvOpInferVarType,
                  ops::Conv2DGradMaker<paddle::framework::OpDesc>,
                  ops::Conv2DGradMaker<paddle::imperative::OpBase>);
752
REGISTER_OPERATOR(depthwise_conv2d_grad, ops::ConvOpGrad);
C
chengduo 已提交
753

Y
Yang Yang 已提交
754
REGISTER_OPERATOR(conv3d, ops::ConvOp, ops::Conv3DOpMaker,
H
hong 已提交
755 756 757 758 759 760
                  ops::ConvOpInferVarType,
                  ops::Conv3DGradMaker<paddle::framework::OpDesc>,
                  ops::Conv3DGradMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(conv3d_grad, ops::ConvOpGrad,
                  ops::Conv3DDoubleGradMaker<paddle::framework::OpDesc>,
                  ops::Conv3DDoubleGradMaker<paddle::imperative::OpBase>);
L
lvmengsi 已提交
761
REGISTER_OPERATOR(conv3d_grad_grad, ops::ConvOpDoubleGrad);
C
chengduoZH 已提交
762

763 764
// depthwise conv kernel
// TODO(xingzhaolong): neon kernel for mobile
Z
zlx 已提交
765
REGISTER_OP_CPU_KERNEL(
766
    depthwise_conv2d,
X
xzl 已提交
767 768 769 770
    ops::GemmConvKernel<paddle::platform::CPUDeviceContext, float>,
    ops::GemmConvKernel<paddle::platform::CPUDeviceContext, double>);

REGISTER_OP_CPU_KERNEL(
771
    depthwise_conv2d_grad,
X
xzl 已提交
772 773
    ops::GemmConvGradKernel<paddle::platform::CPUDeviceContext, float>,
    ops::GemmConvGradKernel<paddle::platform::CPUDeviceContext, double>);
Z
zlx 已提交
774

C
chengduoZH 已提交
775
REGISTER_OP_CPU_KERNEL(
Q
QI JUN 已提交
776 777 778 779 780 781
    conv2d, ops::GemmConvKernel<paddle::platform::CPUDeviceContext, float>,
    ops::GemmConvKernel<paddle::platform::CPUDeviceContext, double>);
REGISTER_OP_CPU_KERNEL(
    conv2d_grad,
    ops::GemmConvGradKernel<paddle::platform::CPUDeviceContext, float>,
    ops::GemmConvGradKernel<paddle::platform::CPUDeviceContext, double>);
L
lvmengsi 已提交
782 783 784 785
REGISTER_OP_CPU_KERNEL(
    conv2d_grad_grad,
    ops::GemmConvDoubleGradKernel<paddle::platform::CPUDeviceContext, float>,
    ops::GemmConvDoubleGradKernel<paddle::platform::CPUDeviceContext, double>);
C
chengduoZH 已提交
786 787

REGISTER_OP_CPU_KERNEL(
Q
QI JUN 已提交
788 789 790 791 792 793
    conv3d, ops::GemmConvKernel<paddle::platform::CPUDeviceContext, float>,
    ops::GemmConvKernel<paddle::platform::CPUDeviceContext, double>);
REGISTER_OP_CPU_KERNEL(
    conv3d_grad,
    ops::GemmConvGradKernel<paddle::platform::CPUDeviceContext, float>,
    ops::GemmConvGradKernel<paddle::platform::CPUDeviceContext, double>);
L
lvmengsi 已提交
794 795 796 797
REGISTER_OP_CPU_KERNEL(
    conv3d_grad_grad,
    ops::GemmConvDoubleGradKernel<paddle::platform::CPUDeviceContext, float>,
    ops::GemmConvDoubleGradKernel<paddle::platform::CPUDeviceContext, double>);