conv_op.cc 36.4 KB
Newer Older
1
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
C
chengduoZH 已提交
2

L
Luo Tao 已提交
3 4 5
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
C
chengduoZH 已提交
6

L
Luo Tao 已提交
7
    http://www.apache.org/licenses/LICENSE-2.0
C
chengduoZH 已提交
8

L
Luo Tao 已提交
9 10 11 12 13
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
C
chengduoZH 已提交
14

Y
Yi Wang 已提交
15
#include "paddle/fluid/operators/conv_op.h"
Y
Update  
Yi Wang 已提交
16

17
#include <memory>
Y
Update  
Yi Wang 已提交
18 19 20
#include <string>
#include <vector>

21 22
#include "paddle/fluid/framework/op_version_registry.h"

23 24 25
#ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/platform/cudnn_helper.h"
#endif
26 27 28 29 30

#ifdef PADDLE_WITH_HIP
#include "paddle/fluid/platform/miopen_helper.h"
#endif

31 32 33
#ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/platform/mkldnn_helper.h"
#endif
34
#include "paddle/fluid/platform/cudnn_workspace_helper.h"
C
chengduoZH 已提交
35 36 37 38

namespace paddle {
namespace operators {

39 40
std::vector<int64_t> ConvOp::ComputeOutputShape(
    framework::InferShapeContext* ctx) const {
41 42
  OP_INOUT_CHECK(ctx->HasInput("Input"), "Input", "Input", "Conv");
  OP_INOUT_CHECK(ctx->HasInput("Filter"), "Input", "Filter", "Conv");
C
chengduoZH 已提交
43 44 45

  auto in_dims = ctx->GetInputDim("Input");
  auto filter_dims = ctx->GetInputDim("Filter");
46

C
chengduoZH 已提交
47 48
  std::vector<int> strides = ctx->Attrs().Get<std::vector<int>>("strides");
  std::vector<int> paddings = ctx->Attrs().Get<std::vector<int>>("paddings");
L
liym27 已提交
49 50
  std::string padding_algorithm =
      ctx->Attrs().Get<std::string>("padding_algorithm");
C
chengduoZH 已提交
51
  int groups = ctx->Attrs().Get<int>("groups");
C
chengduoZH 已提交
52
  std::vector<int> dilations = ctx->Attrs().Get<std::vector<int>>("dilations");
L
liym27 已提交
53
  const std::string data_format = ctx->Attrs().Get<std::string>("data_format");
54 55 56 57 58

  // MKL-DNN Kernels are using NCHW order of dims description
  // so we ignore data_format consideration for MKL-DNN kernel
  const bool channel_last = (this->IsMKLDNNType() == false) &&
                            (data_format == "NHWC" || data_format == "NDHWC");
C
chengduoZH 已提交
59

60 61
  PADDLE_ENFORCE_EQ(
      in_dims.size() == 4 || in_dims.size() == 5, true,
62
      platform::errors::InvalidArgument(
63 64
          "The input of Op(Conv) should be a 4-D or 5-D Tensor. But "
          "received: input's dimension is %u, input's shape is [%s].",
65
          in_dims.size(), in_dims));
66

C
chengduoZH 已提交
67 68
  PADDLE_ENFORCE_EQ(
      in_dims.size(), filter_dims.size(),
69
      platform::errors::InvalidArgument(
70 71 72 73
          "The input's dimension and filter's dimension of "
          "Op(Conv) should be equal. But received: the input's shape is [%s], "
          "the input's dimension is %d; the filter's shape is [%s],  "
          "the filter's dimension is %d.",
74
          in_dims, in_dims.size(), filter_dims, filter_dims.size()));
75 76

  int in_sub_stride_size = in_dims.size() - strides.size();
77 78 79
  PADDLE_ENFORCE_EQ(
      in_dims.size(), strides.size() + 2U,
      platform::errors::InvalidArgument(
80 81 82 83 84 85 86
          "The difference of input's dimension and Attr(strides)'s "
          "length must be euqal to 2 for Op(Conv). "
          "But received: input's dimension is %d, input's shape is [%s]; "
          "Attr(stride)'s length is %d, Attr(stride) is [%s]; "
          "difference of input's dimention and Attr(strides)'s length = %u.",
          in_dims.size(), in_dims, strides.size(),
          framework::make_ddim(strides), in_sub_stride_size));
L
liym27 已提交
87 88 89

  const auto input_channels =
      channel_last ? in_dims[in_dims.size() - 1] : in_dims[1];
F
fengjiayi 已提交
90

91 92
  PADDLE_ENFORCE_EQ(
      input_channels, filter_dims[1] * groups,
93
      platform::errors::InvalidArgument(
94 95 96 97 98
          "The number of input's channels should be equal to filter's channels "
          "* groups for Op(Conv). But received: the input's channels is %d, "
          "the input's shape is [%s]; the filter's channels is %d, the "
          "filter's shape is [%s]; the groups is %d, the data_format is %s. "
          "The error may come from wrong data_format setting.",
99 100
          input_channels, in_dims, filter_dims[1], filter_dims, groups,
          data_format));
C
chengduoZH 已提交
101
  PADDLE_ENFORCE_EQ(
Y
Yang Yu 已提交
102
      filter_dims[0] % groups, 0,
103
      platform::errors::InvalidArgument(
104 105 106 107
          "The number of output's channels (filter's first dimension) of "
          "Op(Conv) should be divided by groups. But received: "
          "the output channels is %d, the filter's shape is [%s], "
          "the groups is %d.",
108
          filter_dims[0], filter_dims, groups));
C
chengduoZH 已提交
109

L
liym27 已提交
110 111 112 113 114 115
  framework::DDim in_data_dims;
  if (channel_last) {
    in_data_dims = framework::slice_ddim(in_dims, 1, in_dims.size() - 1);
  } else {
    in_data_dims = framework::slice_ddim(in_dims, 2, in_dims.size());
  }
116

117 118
  framework::DDim filter_data_dims =
      framework::slice_ddim(filter_dims, 2, filter_dims.size());
119

L
liym27 已提交
120 121 122 123 124 125 126 127
  std::vector<int> ksize = framework::vectorize<int>(filter_data_dims);
  UpdatePaddingAndDilation(&paddings, &dilations, padding_algorithm,
                           in_data_dims, strides, ksize);

  std::vector<int64_t> output_shape({in_dims[0]});
  if (!channel_last) {
    output_shape.push_back(filter_dims[0]);
  }
128
  for (int i = 0; i < in_data_dims.size(); ++i) {
T
tink2123 已提交
129
    if ((!ctx->IsRuntime()) &&
L
liym27 已提交
130
        (in_data_dims[i] <= 0 || filter_dims[i + 2] <= 0)) {
T
tink2123 已提交
131 132
      output_shape.push_back(-1);
    } else {
133 134 135
      output_shape.push_back(
          ConvOutputSize(in_data_dims[i], filter_data_dims[i], dilations[i],
                         paddings[2 * i], paddings[2 * i + 1], strides[i]));
T
tink2123 已提交
136
    }
C
chengduoZH 已提交
137
  }
L
liym27 已提交
138 139 140 141
  if (channel_last) {
    output_shape.push_back(filter_dims[0]);
  }

142
  return output_shape;
C
chengduoZH 已提交
143 144
}

145 146
framework::OpKernelType ConvOp::GetExpectedKernelType(
    const framework::ExecutionContext& ctx) const {
X
Xin Pan 已提交
147 148
  int customized_type_value =
      framework::OpKernelType::kDefaultCustomizedTypeValue;
149
  framework::LibraryType library{framework::LibraryType::kPlain};
M
mozga-intel 已提交
150
  // TODO(pzelazko-intel): enable MKLDNN layout when it's ready
151
  auto input_data_type = OperatorWithKernel::IndicateVarDataType(ctx, "Input");
L
liym27 已提交
152 153
  std::string data_format =
      "AnyLayout";  // todo enable data layout when it's ready
M
mozga-intel 已提交
154 155
  framework::DataLayout layout = framework::StringToDataLayout(data_format);

156
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
157
  if (platform::CanCUDNNBeUsed(ctx)) {
158
    library = framework::LibraryType::kCUDNN;
C
chengduoZH 已提交
159 160
  }
#endif
161
#ifdef PADDLE_WITH_MKLDNN
162 163
  if (library == framework::LibraryType::kPlain &&
      this->CanMKLDNNBeUsed(ctx, input_data_type)) {
164
    library = framework::LibraryType::kMKLDNN;
M
mozga-intel 已提交
165
    layout = framework::DataLayout::kMKLDNN;
166
    customized_type_value =
167 168
        (input_data_type == framework::DataTypeTrait<int8_t>::DataType() ||
         input_data_type == framework::DataTypeTrait<uint8_t>::DataType())
169 170
            ? kConvMKLDNNINT8
            : kConvMKLDNNFP32;
171
  }
172
#endif
173

174
  if (input_data_type != framework::proto::VarType::INT8 &&
175 176
      input_data_type != framework::proto::VarType::UINT8 &&
      input_data_type != framework::proto::VarType::BF16) {
177
    auto filter_data_type = ctx.Input<Tensor>("Filter")->type();
178 179 180 181 182 183 184 185
    PADDLE_ENFORCE_EQ(
        input_data_type, filter_data_type,
        platform::errors::InvalidArgument(
            "input and filter data type should be consistent, "
            "but received input data type is %s and filter type "
            "is %s",
            paddle::framework::DataTypeToString(input_data_type),
            paddle::framework::DataTypeToString(filter_data_type)));
186
  }
K
Kexin Zhao 已提交
187
  if (input_data_type == framework::proto::VarType::FP16) {
188
    PADDLE_ENFORCE_EQ(library, framework::LibraryType::kCUDNN,
189 190
                      platform::errors::InvalidArgument(
                          "float16 can only be used when CUDNN is used"));
K
Kexin Zhao 已提交
191 192
  }

193 194 195
  auto type = framework::OpKernelType(input_data_type, ctx.GetPlace(), layout,
                                      library, customized_type_value);
  return type;
196 197
}

198 199 200 201 202 203 204 205 206 207 208 209 210
framework::OpKernelType ConvOp::GetKernelTypeForVar(
    const std::string& var_name, const Tensor& tensor,
    const framework::OpKernelType& expected_kernel_type) const {
#ifdef PADDLE_WITH_MKLDNN
  // Only input require reshaping, weights and
  // bias are having shape in NCHW order
  if ((var_name == "Input") &&
      (expected_kernel_type.data_layout_ == framework::DataLayout::kMKLDNN) &&
      (tensor.layout() != framework::DataLayout::kMKLDNN)) {
    auto attrs = Attrs();
    auto ar = paddle::framework::AttrReader(attrs);
    const std::string data_format = ar.Get<std::string>("data_format");
    auto dl = framework::StringToDataLayout(data_format);
211
    // Some models may have intentionally set "AnyLayout" for conv
212 213
    // op. Treat this as NCHW (default data_format value)
    if (dl != framework::DataLayout::kAnyLayout) {
214 215
      return framework::OpKernelType(expected_kernel_type.data_type_,
                                     tensor.place(), dl);
216 217 218 219 220 221 222
    }
  }
#endif
  return framework::OpKernelType(expected_kernel_type.data_type_,
                                 tensor.place(), tensor.layout());
}

Y
Yu Yang 已提交
223
void Conv2DOpMaker::Make() {
224 225 226 227
  AddAttr<bool>("is_test",
                "(bool, default false) Set to true for inference only, false "
                "for training. Some layers may run faster when this is true.")
      .SetDefault(false);
L
liym27 已提交
228 229 230 231 232 233
  AddInput("Input",
           "(Tensor) The input tensor of convolution operator. "
           "The format of input tensor is NCHW or NHWC, where N is batch size, "
           "C is the "
           "number of channels, H is the height of the feature, "
           "and W is the width of the feature.");
C
chengduoZH 已提交
234
  AddInput("Filter",
C
fix doc  
chengduoZH 已提交
235
           "(Tensor) The filter tensor of convolution operator. "
C
chengduoZH 已提交
236 237
           "The format of the filter tensor is MCHW, where M is the number of "
           "output image channels, C is the number of input image channels, "
C
fix doc  
chengduoZH 已提交
238 239
           "H is the height of the filter, and W is the width of the filter. "
           "If the groups attribute is greater than 1, C equals the number of "
C
chengduoZH 已提交
240
           "input image channels divided by the groups.");
241 242 243 244 245
  AddInput("Bias",
           "(Tensor) Bias to be added to each output of filter application."
           "The format of output tensor is X (one-dimensional) of size equal"
           "to the number of output channels. Only used with MKL-DNN.")
      .AsDispensable();
246 247 248
  AddInput("ResidualData",
           "(Tensor) Tensor with residual data "
           "to which convolution output will be added."
249
           "Used with fuse_residual_connection fusion.")
250
      .AsDispensable();
Y
Yihua Xu 已提交
251 252
  AddOutput("Output",
            "(Tensor) The output tensor of convolution operator. "
L
liym27 已提交
253
            "It has same data fromat and data type as the Input.");
C
chengduoZH 已提交
254 255 256 257
  AddAttr<std::vector<int>>("strides",
                            "(vector<int> default:{1, 1}), the "
                            "strides(h_stride, w_stride) of "
                            "convolution operator.")
C
chengduoZH 已提交
258
      .SetDefault({1, 1});
C
chengduoZH 已提交
259 260
  AddAttr<std::vector<int>>("paddings",
                            "(vector<int> default:{0, 0}), the "
L
liym27 已提交
261 262
                            "paddings(pad_height_top, pad_height_bottom, "
                            "pad_width_left, pad_wifth_right)  of "
C
chengduoZH 已提交
263
                            "convolution operator.")
C
chengduoZH 已提交
264
      .SetDefault({0, 0});
L
liym27 已提交
265 266 267 268 269 270
  AddAttr<std::string>(
      "padding_algorithm",
      "(string, default \"EXPLICIT\") An optional string from: \"EXPLICIT\","
      "\"SAME\",\"VALID\". Set to \"EXPLICIT\" for explicit padding. "
      "Set to \"SAME\" or \"VALID\" for algorithm of padding. ")
      .SetDefault("EXPLICIT");
C
chengduoZH 已提交
271 272
  AddAttr<int>(
      "groups",
C
chengduoZH 已提交
273
      "(int default:1), the groups number of the convolution operator. "
C
fix doc  
chengduoZH 已提交
274 275 276 277
      "According to grouped convolution in Alex Krizhevsky's Deep CNN paper: "
      "when group=2, the first half of the filters is only connected to the "
      "first half of the input channels, while the second half of the filters "
      "is only connected to the second half of the input channels.")
C
chengduoZH 已提交
278
      .SetDefault(1);
C
chengduoZH 已提交
279
  AddAttr<std::vector<int>>("dilations",
C
chengduoZH 已提交
280 281
                            "(vector<int> default:{1, 1}), the "
                            "dilations(h_dilation, w_dilation) of "
C
chengduoZH 已提交
282
                            "convolution operator.")
C
chengduoZH 已提交
283
      .SetDefault({1, 1});
284 285 286 287
  AddAttr<bool>(
      "use_cudnn",
      "(bool, default false) Only used in cudnn kernel, need install cudnn")
      .SetDefault(false);
288 289 290
  AddAttr<bool>("fuse_relu_before_depthwise_conv",
                "(bool, default false) Only used in cuda depthwise kernel")
      .SetDefault(false);
291 292 293
  AddAttr<bool>("use_mkldnn",
                "(bool, default false) Only used in mkldnn kernel")
      .SetDefault(false);
294 295 296 297
  AddAttr<bool>(
      "use_quantizer",
      "(bool, default false) "
      "This parameter is no longer used. Use 'mkldnn_data_type' instead.")
298
      .SetDefault(false);
299 300 301 302 303
  AddAttr<std::string>(
      "mkldnn_data_type",
      "(string, default \"float32\"). Data type of mkldnn kernel")
      .SetDefault("float32")
      .InEnum({"float32", "int8", "bfloat16"});
M
Michal Gallus 已提交
304 305
  AddAttr<bool>("fuse_relu", "(bool, default false) Only used in mkldnn kernel")
      .SetDefault(false);
306 307 308 309 310 311
  AddAttr<bool>("fuse_brelu",
                "(bool, default false) Only used in mkldnn kernel")
      .SetDefault(false);
  AddAttr<float>("fuse_brelu_threshold",
                 "(float, default false 6.0) Only used in mkldnn kernel")
      .SetDefault(6.0f);
312 313 314 315 316 317 318 319
  AddAttr<std::string>("fuse_activation",
                       "(string, default \"\") Only used in mkldnn kernel")
      .SetDefault("");
  AddAttr<float>("fuse_alpha",
                 "(float, default 0.0) Only used in mkldnn kernel")
      .SetDefault(0.0f);
  AddAttr<float>("fuse_beta", "(float, default 0.0) Only used in mkldnn kernel")
      .SetDefault(0.0f);
320 321 322 323 324
  AddAttr<bool>(
      "use_addto",
      "(bool, default false) If use addto strategy or not, only used in "
      "cudnn kernel")
      .SetDefault(false);
325
  AddAttr<bool>("fuse_residual_connection",
326
                "(bool, default false) Only used in mkldnn kernel. Used "
327 328
                "whenever convolution output is as an input to residual "
                "connection.")
329
      .SetDefault(false);
330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349
  AddAttr<float>("Scale_in",
                 "Scale_in to be used for int8 input data."
                 "Only used with MKL-DNN INT8.")
      .SetDefault(1.0f);
  AddAttr<float>("Scale_out",
                 "Scale_out to be used for int8 output data."
                 "Only used with MKL-DNN INT8.")
      .SetDefault(1.0f);
  AddAttr<float>("Scale_in_eltwise",
                 "Scale_in_eltwise to be used for int8 eltwise input data."
                 "Only used with MKL-DNN INT8.")
      .SetDefault(1.0f);
  AddAttr<std::vector<float>>("Scale_weights",
                              "Scale_weights to be used for int8 weights data."
                              "Only used with MKL-DNN INT8.")
      .SetDefault({1.0f});
  AddAttr<bool>("force_fp32_output",
                "(bool, default false) Force INT8 kernel output FP32, only "
                "used in MKL-DNN INT8")
      .SetDefault(false);
350 351 352 353 354 355
  AddAttr<std::string>(
      "data_format",
      "(string, default NCHW) Only used in "
      "An optional string from: \"NHWC\", \"NCHW\". "
      "Defaults to \"NHWC\". Specify the data format of the output data, "
      "the input will be transformed automatically. ")
L
liym27 已提交
356
      .SetDefault("NCHW");
357 358 359 360 361 362 363 364
  // TODO(dzhwinter): need to registered layout transform function
  AddAttr<int>("workspace_size_MB",
               "Only used in cudnn kernel. Need set use_cudnn to true."
               "workspace size for cudnn, in MB, "
               "workspace is a section of GPU memory which will be "
               "allocated/freed each time the operator runs, larger "
               "workspace size can increase performance but also requires "
               "better hardware. This size should be chosen carefully.")
365
      .SetDefault(platform::GetDefaultConvWorkspaceSizeLimitMB());
366 367
  AddAttr<bool>("exhaustive_search",
                "(bool, default false) cuDNN has many algorithm to calculation "
C
chengduo 已提交
368
                "convolution, whether enable exhaustive search "
翟飞跃 已提交
369
                "for cuDNN convolution or not, default is False.")
370
      .SetDefault(false);
L
liym27 已提交
371

C
chengduoZH 已提交
372
  AddComment(R"DOC(
C
fix doc  
chengduoZH 已提交
373 374
Convolution Operator.

C
chengduoZH 已提交
375
The convolution operation calculates the output based on the input, filter
C
chengduoZH 已提交
376
and strides, paddings, dilations, groups parameters. The size of each dimension of the
C
chengduoZH 已提交
377
parameters is checked in the infer-shape.
L
liym27 已提交
378
Input(Input) and Output(Output) are in NCHW or NHWC format. Where N is batch
C
fix doc  
chengduoZH 已提交
379
size, C is the number of channels, H is the height of the feature, and W is
C
chengduoZH 已提交
380
the width of the feature.
381
Filters(Input) is MCHW format format. Where M is the number of output image channels, C is
C
chengduoZH 已提交
382 383 384 385
the number of input image channels, H is the height of the filter, and W
is the width of the filter.
Parameters(strides, paddings, dilations) are two elements. These two elements represent
height and width, respectively.
C
chengduoZH 已提交
386 387 388 389
The input(X) size and output(Out) size may be different.

Example:
  Input:
C
chengduoZH 已提交
390 391
       Input shape: $(N, C_{in}, H_{in}, W_{in})$
       Filter shape: $(C_{out}, C_{in}, H_f, W_f)$
C
chengduoZH 已提交
392
  Output:
C
chengduoZH 已提交
393 394 395
       Output shape: $(N, C_{out}, H_{out}, W_{out})$
  Where
$$
L
liym27 已提交
396 397
       H_{out}= \frac{(H_{in} + pad_height_top + pad_height_bottom - (dilations[0] * (H_f - 1) + 1))}{strides[0]}+ 1 \\
       W_{out}= \frac{(W_{in} + pad_width_left + pad_width_right - (dilations[1] * (W_f - 1) + 1))}{strides[1]}+ 1
C
chengduoZH 已提交
398
$$
C
chengduoZH 已提交
399
)DOC");
Q
qingqing01 已提交
400
  Apply();
C
chengduoZH 已提交
401 402
}

Y
Yu Yang 已提交
403
void Conv3DOpMaker::Make() {
404 405 406 407
  AddAttr<bool>("is_test",
                "(bool, default false) Set to true for inference only, false "
                "for training. Some layers may run faster when this is true.")
      .SetDefault(false);
C
chengduoZH 已提交
408 409
  AddInput(
      "Input",
C
fix doc  
chengduoZH 已提交
410
      "(Tensor) The input tensor of convolution operator. "
L
liym27 已提交
411 412
      "The format of input tensor is NCDHW or NDHWC. Where N is batch size, C "
      "is the "
C
fix doc  
chengduoZH 已提交
413 414 415
      "number of channels, D is the depth of the feature, H is the height of "
      "the feature, "
      "and W is the width of the feature.");
C
chengduoZH 已提交
416
  AddInput("Filter",
C
fix doc  
chengduoZH 已提交
417
           "(Tensor) The filter tensor of convolution operator. "
C
chengduoZH 已提交
418 419
           "The format of the filter tensor is MCDHW, where M is the number of "
           "output image channels, C is the number of input image channels, "
C
fix doc  
chengduoZH 已提交
420 421 422
           "D is the depth of the filter, H is the height of the filter, and W "
           "is the width of the filter."
           "If the groups attribute is greater than 1, C equals the number of "
C
chengduoZH 已提交
423
           "input image channels divided by the groups.");
424 425 426 427 428
  AddInput("ResidualData",
           "(Tensor) Tensor with residual data "
           "to which convolution output will be added."
           "Used with fuse_residual_connection fusion.")
      .AsDispensable();
Y
Yihua Xu 已提交
429 430
  AddOutput("Output",
            "(Tensor) The output tensor of convolution operator."
L
liym27 已提交
431
            "It has same data fromat and data type as the Input.");
C
chengduoZH 已提交
432 433 434 435
  AddAttr<std::vector<int>>("strides",
                            "(vector<int>, default:{1, 1, 1}), the "
                            "strides(d_stride, h_stride, w_stride) of "
                            "convolution operator.")
C
chengduoZH 已提交
436
      .SetDefault({1, 1, 1});
L
liym27 已提交
437 438 439 440 441 442
  AddAttr<std::vector<int>>(
      "paddings",
      "(vector<int>, default:{0, 0, 0}), the "
      "paddings(pad_depth_front, pad_depth_back, pad_height_top, "
      "pad_height_bottom, pad_width_left, pad_width_right) of convolution "
      "operator.")
C
chengduoZH 已提交
443
      .SetDefault({0, 0, 0});
L
liym27 已提交
444 445 446 447 448 449
  AddAttr<std::string>(
      "padding_algorithm",
      "(string, default \"EXPLICIT\") An optional string from: \"EXPLICIT\","
      "\"SAME\",\"VALID\". Set to \"EXPLICIT\" for explicit padding. "
      "Set to \"SAME\" or \"VALID\" for algorithm of padding. ")
      .SetDefault("EXPLICIT");
C
chengduoZH 已提交
450 451
  AddAttr<int>(
      "groups",
C
chengduoZH 已提交
452
      "(int default:1), the groups number of the convolution operator. "
C
fix doc  
chengduoZH 已提交
453 454 455 456
      "According to grouped convolution in Alex Krizhevsky's Deep CNN paper: "
      "when group=2, the first half of the filters is only connected to the "
      "first half of the input channels, while the second half of the filters "
      "is only connected to the second half of the input channels.")
C
chengduoZH 已提交
457
      .SetDefault(1);
C
chengduoZH 已提交
458
  AddAttr<std::vector<int>>("dilations",
C
chengduoZH 已提交
459 460
                            "(vector<int> default:{1, 1, 1}), the "
                            "dilations(d_dilation, h_dilation, w_dilation) of "
C
chengduoZH 已提交
461
                            "convolution operator.")
C
chengduoZH 已提交
462
      .SetDefault({1, 1, 1});
463 464 465 466
  AddAttr<bool>(
      "use_cudnn",
      "(bool, default false) Only used in cudnn kernel, need install cudnn")
      .SetDefault(false);
467 468 469
  AddAttr<bool>("use_mkldnn",
                "(bool, default false) Only used in mkldnn kernel")
      .SetDefault(false);
470 471 472 473 474
  AddAttr<std::string>(
      "mkldnn_data_type",
      "(string, default \"float32\"). Data type of mkldnn kernel")
      .SetDefault("float32")
      .InEnum({"float32", "int8", "bfloat16"});
475 476
  AddAttr<bool>("fuse_relu", "(bool, default false) Only used in mkldnn kernel")
      .SetDefault(false);
477 478 479 480 481 482 483 484
  AddAttr<std::string>("fuse_activation",
                       "(string, default \"\") Only used in mkldnn kernel")
      .SetDefault("");
  AddAttr<float>("fuse_alpha",
                 "(float, default 0.0) Only used in mkldnn kernel")
      .SetDefault(0.0f);
  AddAttr<float>("fuse_beta", "(float, default 0.0) Only used in mkldnn kernel")
      .SetDefault(0.0f);
485 486 487 488 489
  AddAttr<bool>(
      "use_addto",
      "(bool, default false) If use addto strategy or not, only used in "
      "cudnn kernel")
      .SetDefault(false);
490 491 492 493 494
  AddAttr<bool>("fuse_residual_connection",
                "(bool, default false) Only used in mkldnn kernel. Used "
                "whenever convolution output is as an input to residual "
                "connection.")
      .SetDefault(false);
495 496
  AddAttr<std::string>(
      "data_format",
L
liym27 已提交
497 498 499
      "(string, default NCDHW) Only used in "
      "An optional string from: \"NDHWC\", \"NCDHW\". "
      "Defaults to \"NDHWC\". Specify the data format of the output data, "
500
      "the input will be transformed automatically. ")
L
liym27 已提交
501
      .SetDefault("NCDHW");
502 503 504
  AddAttr<bool>("force_fp32_output",
                "(bool, default false) Only used in mkldnn INT8 kernel")
      .SetDefault(false);
505 506 507 508 509 510 511
  // TODO(dzhwinter): need to registered layout transform function
  AddAttr<int>("workspace_size_MB",
               "Only used in cudnn kernel. workspace size for cudnn, in MB, "
               "workspace is a section of GPU memory which will be "
               "allocated/freed each time the operator runs, larger "
               "workspace size can increase performance but also requires "
               "better hardware. This size should be chosen carefully.")
512
      .SetDefault(platform::GetDefaultConvWorkspaceSizeLimitMB());
513 514
  AddAttr<bool>("exhaustive_search",
                "(bool, default false) cuDNN has many algorithm to calculation "
C
chengduo 已提交
515
                "convolution, whether enable exhaustive search "
翟飞跃 已提交
516
                "for cuDNN convolution or not, default is False.")
517
      .SetDefault(false);
C
chengduoZH 已提交
518
  AddComment(R"DOC(
C
fix doc  
chengduoZH 已提交
519 520
Convolution3D Operator.

C
chengduoZH 已提交
521
The convolution operation calculates the output based on the input, filter
C
chengduoZH 已提交
522
and strides, paddings, dilations, groups parameters. The size of each dimension of the
C
chengduoZH 已提交
523
parameters is checked in the infer-shape.
L
liym27 已提交
524
Input(Input) and output(Output) are in NCDHW or NDHWC format, where N is batch
C
fix doc  
chengduoZH 已提交
525
size, C is the number of channels,D is the depth of the feature, H is the height of
C
chengduoZH 已提交
526 527 528 529 530 531
the feature, and W is the width of the feature.
Filters(Input) is MCDHW format, where M is the number of output image channels,
C is the number of input image channels, D is the depth of the filter,
H is the height of the filter, and W is the width of the filter.
Parameters(strides, paddings, dilations) are three elements. These three elements
represent depth, height and width, respectively.
C
fix doc  
chengduoZH 已提交
532 533 534 535
The input(X) size and output(Out) size may be different.

Example:
  Input:
C
chengduoZH 已提交
536 537
       Input shape: $(N, C_{in}, D_{in}, H_{in}, W_{in})$
       Filter shape: $(C_{out}, C_{in}, D_f, H_f, W_f)$
C
fix doc  
chengduoZH 已提交
538
  Output:
C
chengduoZH 已提交
539 540 541
       Output shape: $(N, C_{out}, D_{out}, H_{out}, W_{out})$
  Where
  $$
L
liym27 已提交
542 543 544
       D_{out}= \frac{(D_{in} + pad_depth_front + pad_depth_back - (dilations[0] * (D_f - 1) + 1))}{ strides[0]}+ 1 \\
       H_{out}= \frac{(H_{in} + pad_height_top + pad_height_bottom - (dilations[1] * (H_f - 1) + 1))}{ strides[1]}+ 1 \\
       W_{out}= \frac{(W_{in} + pad_width_left + pad_width_right - (dilations[2] * (W_f - 1) + 1))}{ strides[2]}+ 1
C
chengduoZH 已提交
545
  $$
C
chengduoZH 已提交
546
)DOC");
Q
qingqing01 已提交
547
  Apply();
C
chengduoZH 已提交
548 549
}

C
chengduoZH 已提交
550 551 552 553 554 555 556 557 558 559 560
void ConvOpGrad::InferShape(framework::InferShapeContext* ctx) const {
  auto in_dims = ctx->GetInputDim("Input");
  auto filter_dims = ctx->GetInputDim("Filter");
  if (ctx->HasOutput(framework::GradVarName("Input"))) {
    ctx->SetOutputDim(framework::GradVarName("Input"), in_dims);
  }
  if (ctx->HasOutput(framework::GradVarName("Filter"))) {
    ctx->SetOutputDim(framework::GradVarName("Filter"), filter_dims);
  }
}

561 562
framework::OpKernelType ConvOpGrad::GetExpectedKernelType(
    const framework::ExecutionContext& ctx) const {
X
Xin Pan 已提交
563 564
  int customized_type_value =
      framework::OpKernelType::kDefaultCustomizedTypeValue;
565
  framework::LibraryType library_{framework::LibraryType::kPlain};
M
mozga-intel 已提交
566
  // TODO(pzelazko-intel): enable MKLDNN layout when it's ready
L
liym27 已提交
567
  std::string data_format = "AnyLayout";
M
mozga-intel 已提交
568
  framework::DataLayout layout_ = framework::StringToDataLayout(data_format);
569
  auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "Input");
M
mozga-intel 已提交
570

571
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
572 573
  if (platform::CanCUDNNBeUsed(ctx)) {
    library_ = framework::LibraryType::kCUDNN;
C
chengduoZH 已提交
574 575
  }
#endif
576 577
#ifdef PADDLE_WITH_MKLDNN
  if (library_ == framework::LibraryType::kPlain &&
578
      this->CanMKLDNNBeUsed(ctx, data_type)) {
579
    const std::string data_format = ctx.Attr<std::string>("data_format");
580
    library_ = framework::LibraryType::kMKLDNN;
M
mozga-intel 已提交
581
    layout_ = framework::DataLayout::kMKLDNN;
X
Xin Pan 已提交
582
    customized_type_value = kConvMKLDNNFP32;
583
  }
584
#endif
585

586 587
  auto type = framework::OpKernelType(data_type, ctx.GetPlace(), layout_,
                                      library_, customized_type_value);
588
  return type;
589 590
}

591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616
framework::OpKernelType ConvOpGrad::GetKernelTypeForVar(
    const std::string& var_name, const Tensor& tensor,
    const framework::OpKernelType& expected_kernel_type) const {
#ifdef PADDLE_WITH_MKLDNN
  // Only input require reshaping, weights and
  // bias are having shape in NCHW order
  if (((var_name == "Input") ||
       (var_name == framework::GradVarName("Output"))) &&
      (expected_kernel_type.data_layout_ == framework::DataLayout::kMKLDNN) &&
      (tensor.layout() != framework::DataLayout::kMKLDNN)) {
    auto attrs = Attrs();
    auto ar = paddle::framework::AttrReader(attrs);
    const std::string data_format = ar.Get<std::string>("data_format");
    auto dl = framework::StringToDataLayout(data_format);
    // Some models may have intentionally set "AnyLayout" for pool
    // op. Treat this as NCHW (default data_format value)
    if (dl != framework::DataLayout::kAnyLayout) {
      return framework::OpKernelType(expected_kernel_type.data_type_,
                                     tensor.place(), dl);
    }
  }
#endif
  return framework::OpKernelType(expected_kernel_type.data_type_,
                                 tensor.place(), tensor.layout());
}

H
hong 已提交
617 618
template <typename T>
class Conv2DGradMaker : public framework::SingleGradOpMaker<T> {
619
 public:
H
hong 已提交
620
  using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
621

622
  void Apply(GradOpPtr<T> op) const override {
S
sneaxiy 已提交
623
    op->SetType(this->ForwardOpType() + "_grad");
H
hong 已提交
624 625 626 627
    op->SetInput("Input", this->Input("Input"));
    op->SetInput("Filter", this->Input("Filter"));
    op->SetInput("Bias", this->Input("Bias"));
    op->SetInput(framework::GradVarName("Output"), this->OutputGrad("Output"));
628

H
hong 已提交
629 630 631 632
    op->SetOutput(framework::GradVarName("Input"), this->InputGrad("Input"));
    op->SetOutput(framework::GradVarName("Filter"), this->InputGrad("Filter"));
    op->SetOutput(framework::GradVarName("Bias"), this->InputGrad("Bias"));
    op->SetAttrMap(this->Attrs());
633
  }
S
sneaxiy 已提交
634 635
};

H
hong 已提交
636 637
template <typename T>
class Conv3DGradMaker : public framework::SingleGradOpMaker<T> {
S
sneaxiy 已提交
638
 public:
H
hong 已提交
639
  using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
640

641
  void Apply(GradOpPtr<T> op) const override {
S
sneaxiy 已提交
642
    op->SetType(this->ForwardOpType() + "_grad");
H
hong 已提交
643 644 645
    op->SetInput("Input", this->Input("Input"));
    op->SetInput("Filter", this->Input("Filter"));
    op->SetInput(framework::GradVarName("Output"), this->OutputGrad("Output"));
S
sneaxiy 已提交
646

H
hong 已提交
647 648
    op->SetOutput(framework::GradVarName("Input"), this->InputGrad("Input"));
    op->SetOutput(framework::GradVarName("Filter"), this->InputGrad("Filter"));
S
sneaxiy 已提交
649

H
hong 已提交
650 651
    if (this->HasInput("ResidualData")) {
      op->SetInput("ResidualData", this->Input("ResidualData"));
S
sneaxiy 已提交
652 653
    }

H
hong 已提交
654
    op->SetAttrMap(this->Attrs());
655 656 657
  }
};

Q
qingqing01 已提交
658 659 660 661
/*
 * Inputs:  I, W, dO, ddI, ddW
 * Outputs: ddO, dW, dI
 */
H
hong 已提交
662 663
template <typename T>
class Conv2DDoubleGradMaker : public framework::SingleGradOpMaker<T> {
Q
qingqing01 已提交
664
 public:
H
hong 已提交
665
  using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
Q
qingqing01 已提交
666

667
  void Apply(GradOpPtr<T> op) const override {
Q
qingqing01 已提交
668 669
    op->SetType(this->ForwardOpType() + "_grad");
    // I, W, dO, ddI, ddW
H
hong 已提交
670 671 672 673 674 675
    op->SetInput("Input", this->Input("Input"));
    op->SetInput("Filter", this->Input("Filter"));
    op->SetInput("DOutput", this->Input(framework::GradVarName("Output")));
    op->SetInput("DDInput", this->OutputGrad(framework::GradVarName("Input")));
    op->SetInput("DDFilter",
                 this->OutputGrad(framework::GradVarName("Filter")));
Q
qingqing01 已提交
676 677 678 679

    // ddO, dI, dW
    // Unlike grad op, double grad op does not use name@GRAD@GRAD
    // as key of ops' inputs and outputs.
H
hong 已提交
680 681
    auto ddx = this->OutputGrad(framework::GradVarName("Input"));
    auto ddw = this->OutputGrad(framework::GradVarName("Filter"));
682

L
lvmengsi 已提交
683
    op->SetOutput("DDOutput",
H
hong 已提交
684
                  ddx.empty()
685
                      ? this->EmptyInputGrad()
H
hong 已提交
686
                      : this->InputGrad(framework::GradVarName("Output")));
687 688 689 690
    op->SetOutput("DFilter", ddx.empty() ? this->EmptyInputGrad()
                                         : this->InputGrad("Filter"));
    op->SetOutput("DInput", ddw.empty() ? this->EmptyInputGrad()
                                        : this->InputGrad("Input"));
691

H
hong 已提交
692
    op->SetAttrMap(this->Attrs());
Q
qingqing01 已提交
693 694 695
  }
};

L
lvmengsi 已提交
696 697 698 699
/*
 * Inputs:  I, W, dO, ddI, ddW
 * Outputs: ddO, dW, dI
 */
H
hong 已提交
700 701
template <typename T>
class Conv3DDoubleGradMaker : public framework::SingleGradOpMaker<T> {
L
lvmengsi 已提交
702
 public:
H
hong 已提交
703
  using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
L
lvmengsi 已提交
704

705
  void Apply(GradOpPtr<T> op) const override {
L
lvmengsi 已提交
706 707
    op->SetType(this->ForwardOpType() + "_grad");
    // I, W, dO, ddI, ddW
H
hong 已提交
708 709 710 711 712 713
    op->SetInput("Input", this->Input("Input"));
    op->SetInput("Filter", this->Input("Filter"));
    op->SetInput("DOutput", this->Input(framework::GradVarName("Output")));
    op->SetInput("DDInput", this->OutputGrad(framework::GradVarName("Input")));
    op->SetInput("DDFilter",
                 this->OutputGrad(framework::GradVarName("Filter")));
L
lvmengsi 已提交
714

H
hong 已提交
715 716
    auto ddx = this->OutputGrad(framework::GradVarName("Input"));
    auto ddw = this->OutputGrad(framework::GradVarName("Filter"));
L
lvmengsi 已提交
717

L
lvmengsi 已提交
718
    op->SetOutput("DDOutput",
H
hong 已提交
719
                  ddx.empty()
720
                      ? this->EmptyInputGrad()
H
hong 已提交
721
                      : this->InputGrad(framework::GradVarName("Output")));
722 723 724 725
    op->SetOutput("DFilter", ddx.empty() ? this->EmptyInputGrad()
                                         : this->InputGrad("Filter"));
    op->SetOutput("DInput", ddw.empty() ? this->EmptyInputGrad()
                                        : this->InputGrad("Input"));
L
lvmengsi 已提交
726

H
hong 已提交
727
    op->SetAttrMap(this->Attrs());
L
lvmengsi 已提交
728 729 730
  }
};

Q
qingqing01 已提交
731 732 733 734 735
void ConvOpDoubleGrad::InferShape(framework::InferShapeContext* ctx) const {
  auto x_dims = ctx->GetInputDim("Input");
  auto w_dims = ctx->GetInputDim("Filter");
  auto do_dims = ctx->GetInputDim("DOutput");

L
lvmengsi 已提交
736 737
  if (ctx->HasOutput("DDOutput") &&
      (ctx->HasInput("DDInput") || (ctx->HasInput("DDFilter")))) {
Q
qingqing01 已提交
738 739
    ctx->SetOutputDim("DDOutput", do_dims);
  }
740
  if (ctx->HasOutput("DFilter") && ctx->HasInput("DDInput")) {
Q
qingqing01 已提交
741 742
    ctx->SetOutputDim("DFilter", w_dims);
  }
743
  if (ctx->HasOutput("DInput") && ctx->HasInput("DDFilter")) {
Q
qingqing01 已提交
744 745 746 747 748 749 750 751 752
    ctx->SetOutputDim("DInput", x_dims);
  }
}

framework::OpKernelType ConvOpDoubleGrad::GetExpectedKernelType(
    const framework::ExecutionContext& ctx) const {
  int customized_type_value =
      framework::OpKernelType::kDefaultCustomizedTypeValue;
  framework::LibraryType library_{framework::LibraryType::kPlain};
L
liym27 已提交
753
  std::string data_format = "AnyLayout";
Q
qingqing01 已提交
754 755
  framework::DataLayout layout_ = framework::StringToDataLayout(data_format);

756
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
Q
qingqing01 已提交
757 758
  if (platform::CanCUDNNBeUsed(ctx)) {
    library_ = framework::LibraryType::kCUDNN;
L
lvmengsi 已提交
759
  }
Q
qingqing01 已提交
760
#endif
761 762 763
  auto type = framework::OpKernelType(
      OperatorWithKernel::IndicateVarDataType(ctx, "Input"), ctx.GetPlace(),
      layout_, library_, customized_type_value);
Q
qingqing01 已提交
764 765 766
  return type;
}

C
chengduoZH 已提交
767 768 769 770
}  // namespace operators
}  // namespace paddle

namespace ops = paddle::operators;
Y
Yang Yang 已提交
771
REGISTER_OPERATOR(conv2d, ops::ConvOp, ops::Conv2DOpMaker,
H
hong 已提交
772 773 774 775 776 777
                  ops::ConvOpInferVarType,
                  ops::Conv2DGradMaker<paddle::framework::OpDesc>,
                  ops::Conv2DGradMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(conv2d_grad, ops::ConvOpGrad,
                  ops::Conv2DDoubleGradMaker<paddle::framework::OpDesc>,
                  ops::Conv2DDoubleGradMaker<paddle::imperative::OpBase>);
Q
qingqing01 已提交
778
REGISTER_OPERATOR(conv2d_grad_grad, ops::ConvOpDoubleGrad);
779 780

// depthwise convolution op
Y
Yang Yang 已提交
781
REGISTER_OPERATOR(depthwise_conv2d, ops::ConvOp, ops::Conv2DOpMaker,
H
hong 已提交
782 783 784
                  ops::ConvOpInferVarType,
                  ops::Conv2DGradMaker<paddle::framework::OpDesc>,
                  ops::Conv2DGradMaker<paddle::imperative::OpBase>);
785 786 787 788
REGISTER_OPERATOR(depthwise_conv2d_grad, ops::ConvOpGrad,
                  ops::Conv2DDoubleGradMaker<paddle::framework::OpDesc>,
                  ops::Conv2DDoubleGradMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(depthwise_conv2d_grad_grad, ops::ConvOpDoubleGrad);
C
chengduo 已提交
789

Y
Yang Yang 已提交
790
REGISTER_OPERATOR(conv3d, ops::ConvOp, ops::Conv3DOpMaker,
H
hong 已提交
791 792 793 794 795 796
                  ops::ConvOpInferVarType,
                  ops::Conv3DGradMaker<paddle::framework::OpDesc>,
                  ops::Conv3DGradMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(conv3d_grad, ops::ConvOpGrad,
                  ops::Conv3DDoubleGradMaker<paddle::framework::OpDesc>,
                  ops::Conv3DDoubleGradMaker<paddle::imperative::OpBase>);
L
lvmengsi 已提交
797
REGISTER_OPERATOR(conv3d_grad_grad, ops::ConvOpDoubleGrad);
C
chengduoZH 已提交
798

799 800
// depthwise conv kernel
// TODO(xingzhaolong): neon kernel for mobile
Z
zlx 已提交
801
REGISTER_OP_CPU_KERNEL(
802
    depthwise_conv2d,
X
xzl 已提交
803 804 805 806
    ops::GemmConvKernel<paddle::platform::CPUDeviceContext, float>,
    ops::GemmConvKernel<paddle::platform::CPUDeviceContext, double>);

REGISTER_OP_CPU_KERNEL(
807
    depthwise_conv2d_grad,
X
xzl 已提交
808 809
    ops::GemmConvGradKernel<paddle::platform::CPUDeviceContext, float>,
    ops::GemmConvGradKernel<paddle::platform::CPUDeviceContext, double>);
Z
zlx 已提交
810

C
chengduoZH 已提交
811
REGISTER_OP_CPU_KERNEL(
Q
QI JUN 已提交
812 813 814 815 816 817
    conv2d, ops::GemmConvKernel<paddle::platform::CPUDeviceContext, float>,
    ops::GemmConvKernel<paddle::platform::CPUDeviceContext, double>);
REGISTER_OP_CPU_KERNEL(
    conv2d_grad,
    ops::GemmConvGradKernel<paddle::platform::CPUDeviceContext, float>,
    ops::GemmConvGradKernel<paddle::platform::CPUDeviceContext, double>);
L
lvmengsi 已提交
818 819 820 821
REGISTER_OP_CPU_KERNEL(
    conv2d_grad_grad,
    ops::GemmConvDoubleGradKernel<paddle::platform::CPUDeviceContext, float>,
    ops::GemmConvDoubleGradKernel<paddle::platform::CPUDeviceContext, double>);
C
chengduoZH 已提交
822 823

REGISTER_OP_CPU_KERNEL(
Q
QI JUN 已提交
824 825 826 827 828 829
    conv3d, ops::GemmConvKernel<paddle::platform::CPUDeviceContext, float>,
    ops::GemmConvKernel<paddle::platform::CPUDeviceContext, double>);
REGISTER_OP_CPU_KERNEL(
    conv3d_grad,
    ops::GemmConvGradKernel<paddle::platform::CPUDeviceContext, float>,
    ops::GemmConvGradKernel<paddle::platform::CPUDeviceContext, double>);
L
lvmengsi 已提交
830 831 832 833
REGISTER_OP_CPU_KERNEL(
    conv3d_grad_grad,
    ops::GemmConvDoubleGradKernel<paddle::platform::CPUDeviceContext, float>,
    ops::GemmConvDoubleGradKernel<paddle::platform::CPUDeviceContext, double>);
834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866

REGISTER_OP_VERSION(conv2d)
    .AddCheckpoint(
        R"ROC(
      Upgrade conv2d, add a new attribute [use_addto].
    )ROC",
        paddle::framework::compatible::OpVersionDesc().NewAttr(
            "use_addto",
            "In order to support new feature (inplace addto strategy) for "
            "gradient accumulation.",
            false));

REGISTER_OP_VERSION(depthwise_conv2d)
    .AddCheckpoint(
        R"ROC(
      Upgrade depthwise_conv2d, add a new attribute [use_addto].
    )ROC",
        paddle::framework::compatible::OpVersionDesc().NewAttr(
            "use_addto",
            "In order to support new feature (inplace addto strategy) for "
            "gradient accumulation.",
            false));

REGISTER_OP_VERSION(conv3d)
    .AddCheckpoint(
        R"ROC(
      Upgrade conv3d, add a new attribute [use_addto].
    )ROC",
        paddle::framework::compatible::OpVersionDesc().NewAttr(
            "use_addto",
            "In order to support new feature (inplace addto strategy) for "
            "gradient accumulation.",
            false));