activation_op.cc 18.3 KB
Newer Older
1
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Q
qijun 已提交
2

L
Luo Tao 已提交
3 4 5
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
Q
qijun 已提交
6

L
Luo Tao 已提交
7
    http://www.apache.org/licenses/LICENSE-2.0
Q
qijun 已提交
8

L
Luo Tao 已提交
9 10 11 12 13
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
Q
qijun 已提交
14

Y
Yi Wang 已提交
15
#include "paddle/fluid/operators/activation_op.h"
16

T
tink2123 已提交
17
#include <memory>
D
dzhwinter 已提交
18
#include <string>
19
#include <type_traits>
T
tink2123 已提交
20
#include <unordered_map>
21
#include <vector>
22

C
Charles-hit 已提交
23
#include "paddle/fluid/framework/infershape_utils.h"
24
#include "paddle/fluid/framework/op_version_registry.h"
25
#include "paddle/fluid/operators/common_infer_shape_functions.h"
J
Jiabin Yang 已提交
26 27 28
#include "paddle/fluid/prim/api/composite_backward/composite_backward_api.h"
#include "paddle/fluid/prim/utils/static/composite_grad_desc_maker.h"
#include "paddle/fluid/prim/utils/static/desc_tensor.h"
29
#include "paddle/phi/backends/dynload/port.h"
30
#include "paddle/phi/core/kernel_registry.h"
C
Charles-hit 已提交
31
#include "paddle/phi/infermeta/backward.h"
32 33

PHI_DECLARE_bool(use_mkldnn);
A
Adam 已提交
34

Q
qijun 已提交
35 36 37
namespace paddle {
namespace operators {

38 39
template <typename GradFunctor>
static constexpr bool CanInplaceAct() {
40 41
  return GradFunctor::FwdDeps() == ActBwdOpFwdDeps::kDepOut ||
         GradFunctor::FwdDeps() == ActBwdOpFwdDeps::kNoDeps;
42 43
}

44 45 46 47 48 49 50 51 52 53 54 55 56 57
#define REGISTER_ACTIVATION_OP_MAKER(OP_NAME, OP_COMMENT)           \
  class OP_NAME##OpMaker                                            \
      : public ::paddle::framework::OpProtoAndCheckerMaker {        \
   public:                                                          \
    void Make() override {                                          \
      AddInput("X",                                                 \
               "Input of " #OP_NAME                                 \
               " operator, an N-D Tensor, with data type float32, " \
               "float64 or float16.");                              \
      AddOutput("Out",                                              \
                "Output of " #OP_NAME                               \
                " operator, a Tensor with shape same as input.");   \
      AddComment(OP_COMMENT);                                       \
    }                                                               \
D
dzhwinter 已提交
58
  }
D
dzhwinter 已提交
59

H
hong 已提交
60 61
template <ActBwdOpFwdDeps kDepValue, typename T>
class ActivationGradOpMaker : public framework::SingleGradOpMaker<T> {
62
 public:
H
hong 已提交
63
  using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
64 65

 protected:
66
  void Apply(GradOpPtr<T> op) const override {
H
hong 已提交
67 68 69 70
    op->SetType(this->ForwardOpType() + "_grad");
    op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out"));
    op->SetOutput(framework::GradVarName("X"), this->InputGrad("X"));
    op->SetAttrMap(this->Attrs());
71

A
Adam 已提交
72 73
    if ((static_cast<int>(kDepValue) &
         static_cast<int>(ActBwdOpFwdDeps::kDepX)) ||
74 75
        FLAGS_use_mkldnn ||
        (op->HasAttr("use_mkldnn") &&
R
Ruibiao Chen 已提交
76
         PADDLE_GET_CONST(bool, op->GetAttr("use_mkldnn")))) {
77
      op->SetInput("X", this->Input("X"));  // x
78 79 80 81
    }

    if (static_cast<int>(kDepValue) &
        static_cast<int>(ActBwdOpFwdDeps::kDepOut)) {
82
      op->SetInput("Out", this->Output("Out"));  // out
83
    }
D
dzhwinter 已提交
84
  }
85
};
J
Jiabin Yang 已提交
86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101
class HardSwishCompositeGradOpMaker : public prim::CompositeGradOpMakerBase {
 public:
  using prim::CompositeGradOpMakerBase::CompositeGradOpMakerBase;

 protected:
  void Apply() override {
    paddle::Tensor x = this->GetSingleForwardInput("X");
    paddle::Tensor out_grad = this->GetSingleOutputGrad("Out");
    paddle::Tensor dx = this->GetSingleInputGrad("X");
    auto* dx_ptr = this->GetOutputPtr(&dx);
    std::string dx_name = this->GetOutputName(dx);
    VLOG(6) << "Runing hardswish_grad composite func";
    prim::hardswish_grad<prim::DescTensor>(x, out_grad, dx_ptr);
    this->RecoverOutputName(dx, dx_name);
  }
};
D
dzhwinter 已提交
102

103 104 105
phi::KernelKey GetKernelType(const framework::ExecutionContext& ctx,
                             const framework::OperatorWithKernel& oper,
                             const std::string& name) {
106
  auto data_type = oper.IndicateVarDataType(ctx, name);
107 108 109 110 111 112 113 114 115 116
  // FIXME(liuwei1031) temporarily disable the code to unblock users
  // TODO(liuwei1031) figure out the reason behind
  // https://github.com/PaddlePaddle/Paddle/issues/16096
  // and re-enable this in the future
  // #ifdef PADDLE_WITH_CUDA
  //   auto it1 = oper.Attrs().find("use_cudnn");
  //   if (it1 != oper.Attrs().end() && platform::CanCUDNNBeUsed(ctx)) {
  //     library = framework::LibraryType::kCUDNN;
  //   }
  // #endif
117
  return phi::KernelKey(data_type, ctx.GetPlace());
118 119
}

Q
qijun 已提交
120 121 122 123
class ActivationOp : public framework::OperatorWithKernel {
 public:
  using framework::OperatorWithKernel::OperatorWithKernel;

124
  void InferShape(framework::InferShapeContext* ctx) const override {
125
    ctx->ShareDim("X", /*->*/ "Out");
F
fengjiayi 已提交
126
    ctx->ShareLoD("X", /*->*/ "Out");
Q
qijun 已提交
127
  }
128

129
 protected:
130
  phi::KernelKey GetExpectedKernelType(
131 132 133
      const framework::ExecutionContext& ctx) const override {
    return GetKernelType(ctx, *this, "X");
  }
Q
qijun 已提交
134 135
};

C
chengduo 已提交
136 137 138
class ActivationOpInferVarType
    : public framework::PassInDtypeAndVarTypeToOutput {
 protected:
139
  std::unordered_map<std::string, std::string>& GetInputOutputWithSameType()
C
chengduo 已提交
140
      const override {
141 142
    static std::unordered_map<std::string, std::string> m{{"X", /*->*/ "Out"}};
    return m;
143 144 145
  }
};

Q
qijun 已提交
146 147 148 149
class ActivationOpGrad : public framework::OperatorWithKernel {
 public:
  using framework::OperatorWithKernel::OperatorWithKernel;

150
  void InferShape(framework::InferShapeContext* ctx) const override {
151 152 153
    auto out_grad_name = framework::GradVarName("Out");
    ctx->ShareDim(out_grad_name, framework::GradVarName("X"));
    ctx->ShareLoD(out_grad_name, framework::GradVarName("X"));
Q
qijun 已提交
154
  }
155

156
 protected:
157
  phi::KernelKey GetExpectedKernelType(
158
      const framework::ExecutionContext& ctx) const override {
159
    return GetKernelType(ctx, *this, framework::GradVarName("Out"));
160
  }
Q
qijun 已提交
161 162
};

163 164
class SoftReluOpMaker : public framework::OpProtoAndCheckerMaker {
 public:
Y
Yu Yang 已提交
165
  void Make() override {
166
    AddInput("X", "Input of SoftRelu operator");
F
fengjiayi 已提交
167
    AddOutput("Out", "Output of SoftRelu operator");
168 169
    AddAttr<float>("threshold", "The threshold value of SoftRelu")
        .SetDefault(40.0f);
K
Kexin Zhao 已提交
170
    AddComment(R"DOC(
K
kexinzhao 已提交
171
SoftRelu Activation Operator.
K
Kexin Zhao 已提交
172

173
$$out = \ln(1 + \exp(\max(\min(x, threshold), -threshold)))$$
K
Kexin Zhao 已提交
174 175

)DOC");
176 177 178
  }
};

A
Abhinav Arora 已提交
179 180
class SwishOpMaker : public framework::OpProtoAndCheckerMaker {
 public:
Y
Yu Yang 已提交
181
  void Make() override {
A
Abhinav Arora 已提交
182
    AddInput("X", "Input of Swish operator");
F
fengjiayi 已提交
183
    AddOutput("Out", "Output of Swish operator");
A
Abhinav Arora 已提交
184 185 186 187
    AddAttr<float>("beta", "Constant beta of swish operator").SetDefault(1.0f);
    AddComment(R"DOC(
Swish Activation Operator.

188
$$out = \\frac{x}{1 + e^{- \beta \ x}}$$
A
Abhinav Arora 已提交
189 190 191 192 193

)DOC");
  }
};

194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219
class MishOpMaker : public framework::OpProtoAndCheckerMaker {
 public:
  void Make() override {
    AddInput("X", "Input of Mish operator");
    AddOutput("Out", "Output of Mish operator");
    AddAttr<float>(
        "threshold",
        "Constant threshold of softplus in Mish operator. Approximate value "
        "of softplus will be used if absolute value of input is greater than "
        ":attr:`threshold`")
        .SetDefault(20.f);
    AddComment(R"DOC(
Mish Activation Operator.

..  math::
    softplus(x) = \begin{cases}
            x, \text{if } x > \text{threshold} \\
            \ln(1 + e^{x}),  \text{otherwise}
          \end{cases}

    out = x * \tanh(softplus(x))

)DOC");
  }
};

H
huangjun12 已提交
220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235
class HardSwishOpMaker : public framework::OpProtoAndCheckerMaker {
 public:
  void Make() override {
    AddInput("X", "Input of HardSwish operator");
    AddOutput("Out", "Output of HardSwish operator");
    AddAttr<float>("threshold", "The threshold parameter of HardSwish operator")
        .SetDefault(6.0f);
    AddAttr<float>("scale", "The scale parameter of HardSwish operator")
        .SetDefault(6.0f);
    AddAttr<float>("offset", "The offset parameter of HardSwish operator")
        .SetDefault(3.0f);
    AddComment(R"DOC(
HardSwish Activation Operator.

The hard version of swish(https://arxiv.org/pdf/1905.02244.pdf).

236
$$out = \frac{x * (min(max(0, x+offset), threshold))}{scale}$$
H
huangjun12 已提交
237 238 239 240 241 242 243 244 245

The threshold and scale should be positive. The offset can be either positive or negative.
The default parameters are set according to the above reference.
It is recommended to use the defaults for this activation.

)DOC");
  }
};

246
template <ActBwdOpFwdDeps kDepValue>
247 248 249 250 251
class ActivationOpDoubleGrad : public framework::OperatorWithKernel {
 public:
  using framework::OperatorWithKernel::OperatorWithKernel;

  void InferShape(framework::InferShapeContext* ctx) const override {
252 253
    if (static_cast<int>(kDepValue) &
        static_cast<int>(ActBwdOpFwdDeps::kDepX)) {
254
      if (ctx->HasOutput("DX")) {
255 256 257
        ctx->ShareDim("X", "DX");
        ctx->ShareLoD("X", "DX");
      }
258
      if (ctx->HasOutput("DDOut")) {
259 260 261
        ctx->ShareDim("X", "DDOut");
        ctx->ShareLoD("X", "DDOut");
      }
262
    }
263 264
    if (static_cast<int>(kDepValue) &
        static_cast<int>(ActBwdOpFwdDeps::kDepOut)) {
265
      if (ctx->HasOutput("DOut")) {
266 267 268
        ctx->ShareDim("Out", "DOut");
        ctx->ShareLoD("Out", "DOut");
      }
269 270 271 272
      if (ctx->HasOutput("DDOut")) {
        ctx->ShareDim("Out", "DDOut");
        ctx->ShareLoD("Out", "DDOut");
      }
273 274 275 276
      if (ctx->HasOutput("DOutNew")) {
        ctx->ShareDim("Out", "DOutNew");
        ctx->ShareLoD("Out", "DOutNew");
      }
277 278 279 280
    }
  }

 protected:
281
  phi::KernelKey GetExpectedKernelType(
282 283 284 285 286 287 288 289 290 291 292
      const framework::ExecutionContext& ctx) const override {
    return GetKernelType(ctx, *this, "DDX");
  }
};

template <ActBwdOpFwdDeps kDepValue>
class ActivationOpDoubleGrad2 : public framework::OperatorWithKernel {
 public:
  using framework::OperatorWithKernel::OperatorWithKernel;

  void InferShape(framework::InferShapeContext* ctx) const override {
293 294
    if (static_cast<int>(kDepValue) &
        static_cast<int>(ActBwdOpFwdDeps::kDepX)) {
295 296 297 298 299
      if (ctx->HasOutput("DDOut")) {
        ctx->ShareDim("X", "DDOut");
        ctx->ShareLoD("X", "DDOut");
      }
    }
300 301
    if (static_cast<int>(kDepValue) &
        static_cast<int>(ActBwdOpFwdDeps::kDepOut)) {
302
      if (ctx->HasOutput("DDOut")) {
303 304 305
        ctx->ShareDim("Out", "DDOut");
        ctx->ShareLoD("Out", "DDOut");
      }
306 307 308 309
    }
  }

 protected:
310
  phi::KernelKey GetExpectedKernelType(
311 312 313 314 315
      const framework::ExecutionContext& ctx) const override {
    return GetKernelType(ctx, *this, "DDX");
  }
};

316 317 318 319 320 321
template <ActBwdOpFwdDeps kDepValue>
class ActivationOpTripleGrad : public framework::OperatorWithKernel {
 public:
  using framework::OperatorWithKernel::OperatorWithKernel;

  void InferShape(framework::InferShapeContext* ctx) const override {
322 323
    if (static_cast<int>(kDepValue) &
        static_cast<int>(ActBwdOpFwdDeps::kDepX)) {
324 325 326 327 328 329 330 331 332
      if (ctx->HasOutput("DX")) {
        ctx->ShareDim("X", "DX");
        ctx->ShareLoD("X", "DX");
      }
      if (ctx->HasOutput("DDOut")) {
        ctx->ShareDim("X", "DDOut");
        ctx->ShareLoD("X", "DDOut");
      }
    }
333 334
    if (static_cast<int>(kDepValue) &
        static_cast<int>(ActBwdOpFwdDeps::kDepOut)) {
335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350
      if (ctx->HasOutput("D_DOut")) {
        ctx->ShareDim("Out", "D_DOut");
        ctx->ShareLoD("Out", "D_DOut");
      }
      if (ctx->HasOutput("D_OutNew")) {
        ctx->ShareDim("Out", "D_OutNew");
        ctx->ShareLoD("Out", "D_OutNew");
      }
      if (ctx->HasOutput("D_DDx")) {
        ctx->ShareDim("DDX", "D_DDx");
        ctx->ShareLoD("DDX", "D_DDx");
      }
    }
  }

 protected:
351
  phi::KernelKey GetExpectedKernelType(
352 353 354 355 356
      const framework::ExecutionContext& ctx) const override {
    return GetKernelType(ctx, *this, "DDX");
  }
};

357
DECLARE_INPLACE_OP_INFERER(ActivationGradOpInplaceInferer,
358 359
                           {framework::GradVarName("Out"),  // dout
                            framework::GradVarName("X")});  // dx
360
DECLARE_INPLACE_OP_INFERER(ActivationDoubleGradOpInplaceInferer,
361
                           {"DDX", "DDOut"});
362 363
DECLARE_INPLACE_OP_INFERER(ActivationTripleGradOpInplaceInferer,
                           {"DDX", "D_DOut"});
364

365
DECLARE_INPLACE_OP_INFERER(ActFwdInplaceInferer, {"X", "Out"});
366 367 368 369 370 371 372 373 374 375 376 377

#define DEFINE_ACTIVATION_CPU_KERNEL(op_name, functor, grad_functor)           \
  template <typename T, typename DeviceContext>                                \
  class op_name##Kernel : public ActivationKernel<DeviceContext, functor<T>> { \
  };                                                                           \
                                                                               \
  template <typename T, typename DeviceContext>                                \
  class op_name##GradKernel                                                    \
      : public ActivationGradKernel<DeviceContext, grad_functor<T>> {};

DEFINE_ACTIVATION_CPU_KERNEL(SoftRelu, SoftReluFunctor, SoftReluGradFunctor)

Q
qijun 已提交
378 379 380 381
}  // namespace operators
}  // namespace paddle

namespace ops = paddle::operators;
382
namespace plat = paddle::platform;
383

384 385
#define REGISTER_ACTIVATION_OP(KERNEL_TYPE, OP_NAME, functor, grad_functor) \
  REGISTER_OPERATOR(                                                        \
386 387 388
      KERNEL_TYPE,                                                          \
      ops::ActivationOp,                                                    \
      ops::OP_NAME##OpMaker,                                                \
389
      ops::ActivationOpInferVarType,                                        \
H
hong 已提交
390 391 392 393
      ops::ActivationGradOpMaker<ops::grad_functor<float>::FwdDeps(),       \
                                 paddle::framework::OpDesc>,                \
      ops::ActivationGradOpMaker<ops::grad_functor<float>::FwdDeps(),       \
                                 paddle::imperative::OpBase>,               \
394
      std::conditional<ops::CanInplaceAct<ops::grad_functor<float>>(),      \
395 396 397 398
                       ops::ActFwdInplaceInferer,                           \
                       void>::type);                                        \
  REGISTER_OPERATOR(KERNEL_TYPE##_grad,                                     \
                    ops::ActivationOpGrad,                                  \
399
                    ops::ActivationGradOpInplaceInferer);
400

J
Jiabin Yang 已提交
401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419
#define REGISTER_ACTIVATION_OP_WITH_COMP(                              \
    KERNEL_TYPE, OP_NAME, functor, grad_functor)                       \
  REGISTER_OPERATOR(                                                   \
      KERNEL_TYPE,                                                     \
      ops::ActivationOp,                                               \
      ops::OP_NAME##OpMaker,                                           \
      ops::ActivationOpInferVarType,                                   \
      ops::ActivationGradOpMaker<ops::grad_functor<float>::FwdDeps(),  \
                                 paddle::framework::OpDesc>,           \
      ops::ActivationGradOpMaker<ops::grad_functor<float>::FwdDeps(),  \
                                 paddle::imperative::OpBase>,          \
      ops::OP_NAME##CompositeGradOpMaker,                              \
      std::conditional<ops::CanInplaceAct<ops::grad_functor<float>>(), \
                       ops::ActFwdInplaceInferer,                      \
                       void>::type);                                   \
  REGISTER_OPERATOR(KERNEL_TYPE##_grad,                                \
                    ops::ActivationOpGrad,                             \
                    ops::ActivationGradOpInplaceInferer);

420
FOR_EACH_ACTIVATION_OP(REGISTER_ACTIVATION_OP);
421 422 423 424 425 426 427 428 429 430 431 432

#define REGISTER_ACTIVATION_CPU_KERNEL(act_type, op_name)                \
  PD_REGISTER_STRUCT_KERNEL(                                             \
      act_type, CPU, ALL_LAYOUT, ops::op_name##Kernel, float, double) {} \
  PD_REGISTER_STRUCT_KERNEL(act_type##_grad,                             \
                            CPU,                                         \
                            ALL_LAYOUT,                                  \
                            ops::op_name##GradKernel,                    \
                            float,                                       \
                            double) {}

REGISTER_ACTIVATION_CPU_KERNEL(soft_relu, SoftRelu)
433

434
REGISTER_ACTIVATION_OP(mish, Mish, MishFunctor, MishGradFunctor);
J
Jiabin Yang 已提交
435 436 437 438
REGISTER_ACTIVATION_OP_WITH_COMP(hard_swish,
                                 HardSwish,
                                 HardSwishFunctor,
                                 HardSwishGradFunctor);
Y
YuanRisheng 已提交
439
REGISTER_ACTIVATION_OP(swish, Swish, SwishFunctor, SwishGradFunctor);
440

441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459
/* ==========================  register checkpoint ===========================*/
REGISTER_OP_VERSION(leaky_relu)
    .AddCheckpoint(
        R"ROC(fix leaky_relu, bahavior changed when alpha < 0 or alpha > 1)ROC",
        paddle::framework::compatible::OpVersionDesc()
            .BugfixWithBehaviorChanged(
                "leaky_relu calculate formula before checkponit: out = max(x, "
                "alpha * x); after checkpoint: out = x if x > 0 else alpha * "
                "x"));

REGISTER_OP_VERSION(hard_shrink)
    .AddCheckpoint(
        R"ROC(fix hard_shrink, bahavior changed when threshold<0)ROC",
        paddle::framework::compatible::OpVersionDesc()
            .BugfixWithBehaviorChanged(
                "hard_shrink calculate formula before checkponit: out = x * "
                "((x < -threshold) + (x > threshold)); after checkpoint: out = "
                "x * (((x < -threshold) + (x > threshold)) > 0)"));

460 461
REGISTER_OP_VERSION(softplus).AddCheckpoint(
    R"ROC(add new attributes [beta] and [threshold], and the formula is changed to "
462 463
         " softplus(x) = \\frac{1}{beta} * \\log(1 + e^{beta * x}) \\\\ \\text{For numerical"
         " stability, the implementation reverts to the linear function when: beta * x > threshold.})ROC",
464 465 466 467 468 469 470
    paddle::framework::compatible::OpVersionDesc()
        .NewAttr("beta", "The beta value of the new formula", 1.0f)
        .NewAttr("threshold", "The threshold value of the new formula", 20.0f));

REGISTER_OP_VERSION(mish).AddCheckpoint(
    R"ROC(add new attributes [use_mkldnn], and when computing softplus the formula is changed as the new veriosn of softplus)ROC",
    paddle::framework::compatible::OpVersionDesc().NewAttr(
471 472
        "use_mkldnn",
        "(bool, default false) Only used in mkldnn kernel",
473
        false));
474

475
/* ========================================================================== */