activation_mkldnn_op.cc 12.6 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

   http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License. */

#include "paddle/fluid/operators/activation_op.h"
16
#include "paddle/fluid/operators/mkldnn/softplus_mkldnn_op.h"
17
#include "paddle/fluid/platform/mkldnn_reuse.h"
18

19
namespace phi {
20
class DenseTensor;
21
}  // namespace phi
22

23 24 25
namespace paddle {
namespace operators {

26 27 28
using dnnl::memory;
using dnnl::primitive;
using dnnl::stream;
29 30
using framework::DataLayout;
using framework::Tensor;
31 32 33
using platform::GetMKLDNNFormat;
using platform::MKLDNNDeviceContext;
using platform::to_void_cast;
34

35 36 37 38 39 40 41 42 43
template <typename Functor>
class MKLDNNActivationKernel
    : public framework::OpKernel<typename Functor::ELEMENT_TYPE> {
 public:
  void Compute(const framework::ExecutionContext &ctx) const override {
    Functor functor;
    functor(ctx);
  }
};
K
Krzysztof Binias 已提交
44

45 46 47 48 49 50 51 52 53 54 55 56
template <typename Functor>
class MKLDNNActivationGradKernel
    : public framework::OpKernel<typename Functor::ELEMENT_TYPE> {
 public:
  void Compute(const framework::ExecutionContext &ctx) const override {
    Functor functor;
    functor(ctx);
  }
};

template <typename T>
void eltwise_forward(const framework::ExecutionContext &ctx,
57
                     dnnl::algorithm algorithm) {
58 59
  PADDLE_ENFORCE_EQ(platform::is_cpu_place(ctx.GetPlace()),
                    true,
60 61
                    paddle::platform::errors::PreconditionNotMet(
                        "Operator DNNL eletwise_forward must use CPUPlace"));
62
  auto &dev_ctx = ctx.template device_context<MKLDNNDeviceContext>();
63
  const auto &mkldnn_engine = dev_ctx.GetEngine();
64

65
  const auto *x = ctx.Input<Tensor>("X");
66
  auto *out = ctx.Output<Tensor>("Out");
67

68
  bool is_inplaced = x->IsSharedBufferWith(*out);
69

70 71
  platform::ActivationMKLDNNHandler<T> handler(
      algorithm, ctx, mkldnn_engine, ctx.GetPlace(), x);
72

73
  auto src_memory_p = handler.AcquireSrcMemory(x);
74 75 76
  std::shared_ptr<dnnl::memory> dst_memory_p = nullptr;
  if (is_inplaced) {
    dst_memory_p = src_memory_p;
77
    out->mutable_data<T>(ctx.GetPlace());
78
  } else {
79
    dst_memory_p = handler.AcquireDstMemory(out);
80
  }
A
Adam 已提交
81
  auto activation_p = handler.AcquireForwardPrimitive();
82

83
  auto &astream = paddle::platform::MKLDNNDeviceContext::tls().get_stream();
84 85
  activation_p->execute(
      astream, {{DNNL_ARG_FROM, *src_memory_p}, {DNNL_ARG_TO, *dst_memory_p}});
A
Adam 已提交
86
  astream.wait();
87

88
  out->set_mem_desc(dst_memory_p->get_desc());
89 90
}

91 92
template <typename T>
void eltwise_grad(const framework::ExecutionContext &ctx,
93
                  dnnl::algorithm algorithm) {
94
  auto &dev_ctx = ctx.template device_context<MKLDNNDeviceContext>();
95
  const auto &mkldnn_engine = dev_ctx.GetEngine();
96

97
  const auto *x = ctx.Input<Tensor>("X");
98 99
  const auto *dout = ctx.Input<Tensor>(framework::GradVarName("Out"));
  auto *dx = ctx.Output<Tensor>(framework::GradVarName("X"));
100

101 102
  platform::ActivationMKLDNNHandler<T> handler(
      algorithm, ctx, mkldnn_engine, ctx.GetPlace(), x, dout);
103

104
  auto src_memory_p = handler.AcquireBackwardSrcMemory(x);
105 106
  auto diff_dst_memory_p = handler.AcquireDiffDstMemory(dout);
  auto diff_src_memory_p = handler.AcquireDiffSrcMemory(dx);
A
Adam 已提交
107 108
  auto activation_backward_p = handler.AcquireBackwardPrimitive();

109
  auto &astream = paddle::platform::MKLDNNDeviceContext::tls().get_stream();
A
Adam 已提交
110
  activation_backward_p->execute(astream,
111 112 113
                                 {{DNNL_ARG_SRC, *src_memory_p},
                                  {DNNL_ARG_DIFF_DST, *diff_dst_memory_p},
                                  {DNNL_ARG_DIFF_SRC, *diff_src_memory_p}});
A
Adam 已提交
114
  astream.wait();
115

116
  dx->set_mem_desc(diff_src_memory_p->get_desc());
117 118 119 120 121 122 123 124 125 126 127 128
}

template <typename T>
void eltwise_grad_use_out(const framework::ExecutionContext &ctx,
                          dnnl::algorithm algorithm) {
  auto &dev_ctx = ctx.template device_context<MKLDNNDeviceContext>();
  const auto &mkldnn_engine = dev_ctx.GetEngine();

  const auto *out = ctx.Input<Tensor>("Out");
  const auto *dout = ctx.Input<Tensor>(framework::GradVarName("Out"));
  auto *dx = ctx.Output<Tensor>(framework::GradVarName("X"));

129 130
  platform::ActivationMKLDNNHandler<T> handler(
      algorithm, ctx, mkldnn_engine, ctx.GetPlace(), out, dout);
131 132 133 134 135 136 137 138 139 140 141 142 143

  auto dst_memory_p = handler.AcquireBackwardSrcMemory(out);
  auto diff_dst_memory_p = handler.AcquireDiffDstMemory(dout);
  auto diff_src_memory_p = handler.AcquireDiffSrcMemory(dx);
  auto activation_backward_p = handler.AcquireBackwardPrimitive();

  auto &astream = paddle::platform::MKLDNNDeviceContext::tls().get_stream();
  activation_backward_p->execute(astream,
                                 {{DNNL_ARG_DST, *dst_memory_p},
                                  {DNNL_ARG_DIFF_DST, *diff_dst_memory_p},
                                  {DNNL_ARG_DIFF_SRC, *diff_src_memory_p}});
  astream.wait();

144
  dx->set_mem_desc(diff_src_memory_p->get_desc());
145 146
}

147
template <typename T, dnnl::algorithm algorithm>
148
struct MKLDNNActivationFunc : public BaseActivationFunctor<T> {
149
  void operator()(const framework::ExecutionContext &ctx) const {
150 151 152 153
    eltwise_forward<T>(ctx, algorithm);
  }
};

154
template <typename T, dnnl::algorithm algorithm>
155
struct MKLDNNActivationGradFunc : public BaseActivationFunctor<T> {
156
  void operator()(const framework::ExecutionContext &ctx) const {
157 158 159 160
    eltwise_grad<T>(ctx, algorithm);
  }
};

161 162 163 164 165 166 167
template <typename T, dnnl::algorithm algorithm>
struct MKLDNNActivationGradUseOutFunc : public BaseActivationFunctor<T> {
  void operator()(const framework::ExecutionContext &ctx) const {
    eltwise_grad_use_out<T>(ctx, algorithm);
  }
};

A
Adam 已提交
168 169 170 171 172
template <typename T>
struct GeluMKLDNNFunctor : public BaseActivationFunctor<T> {
  void operator()(const framework::ExecutionContext &ctx) const {
    const bool approximate = ctx.Attr<bool>("approximate");
    if (approximate) {
173
      eltwise_forward<T>(ctx, dnnl::algorithm::eltwise_gelu_tanh);
A
Adam 已提交
174
    } else {
175
      eltwise_forward<T>(ctx, dnnl::algorithm::eltwise_gelu_erf);
A
Adam 已提交
176 177 178 179 180 181 182 183 184
    }
  }
};

template <typename T>
struct GeluMKLDNNGradFunctor : public BaseActivationFunctor<T> {
  void operator()(const framework::ExecutionContext &ctx) const {
    const bool approximate = ctx.Attr<bool>("approximate");
    if (approximate) {
185
      eltwise_grad<T>(ctx, dnnl::algorithm::eltwise_gelu_tanh);
A
Adam 已提交
186
    } else {
187
      eltwise_grad<T>(ctx, dnnl::algorithm::eltwise_gelu_erf);
A
Adam 已提交
188 189 190 191
    }
  }
};

192 193 194 195 196 197 198
template <typename T>
struct SoftplusMKLDNNFunctor : public BaseActivationFunctor<T> {
  void operator()(const framework::ExecutionContext &ctx) const {
    custom_softplus_eltwise_forward<T>(ctx);
  }
};

199
template <typename T>
T
tensor-tang 已提交
200
using ReluMKLDNNFunctor =
201
    MKLDNNActivationFunc<T, dnnl::algorithm::eltwise_relu>;
202

A
Adam 已提交
203 204
template <typename T>
using Relu6MKLDNNFunctor =
205
    MKLDNNActivationFunc<T, dnnl::algorithm::eltwise_bounded_relu>;
A
Adam 已提交
206

207 208
template <typename T>
using SwishMKLDNNFunctor =
209
    MKLDNNActivationFunc<T, dnnl::algorithm::eltwise_swish>;
210

J
jakpiase 已提交
211 212
template <typename T>
using HardSwishMKLDNNFunctor =
213
    MKLDNNActivationFunc<T, dnnl::algorithm::eltwise_hardswish>;
J
jakpiase 已提交
214

215 216 217 218
template <typename T>
using MishMKLDNNFunctor =
    MKLDNNActivationFunc<T, dnnl::algorithm::eltwise_mish>;

219 220
template <typename T>
using SigmoidMKLDNNFunctor =
221
    MKLDNNActivationFunc<T, dnnl::algorithm::eltwise_logistic>;
222

223
template <typename T>
T
tensor-tang 已提交
224
using TanhMKLDNNFunctor =
225
    MKLDNNActivationFunc<T, dnnl::algorithm::eltwise_tanh>;
226 227

template <typename T>
T
tensor-tang 已提交
228
using SqrtMKLDNNFunctor =
229
    MKLDNNActivationFunc<T, dnnl::algorithm::eltwise_sqrt>;
230 231

template <typename T>
232
using AbsMKLDNNFunctor = MKLDNNActivationFunc<T, dnnl::algorithm::eltwise_abs>;
233

J
jakpiase 已提交
234
template <typename T>
235
using EluMKLDNNFunctor = MKLDNNActivationFunc<T, dnnl::algorithm::eltwise_elu>;
J
jakpiase 已提交
236

237 238 239
template <typename T>
using ExpMKLDNNFunctor = MKLDNNActivationFunc<T, dnnl::algorithm::eltwise_exp>;

240 241 242 243
template <typename T>
using RoundMKLDNNFunctor =
    MKLDNNActivationFunc<T, dnnl::algorithm::eltwise_round>;

244
template <typename T>
T
tensor-tang 已提交
245
using ReluMKLDNNGradFunctor =
246
    MKLDNNActivationGradFunc<T, dnnl::algorithm::eltwise_relu>;
247

A
Adam 已提交
248 249
template <typename T>
using Relu6MKLDNNGradFunctor =
250
    MKLDNNActivationGradFunc<T, dnnl::algorithm::eltwise_bounded_relu>;
A
Adam 已提交
251

252 253
template <typename T>
using SwishMKLDNNGradFunctor =
254
    MKLDNNActivationGradFunc<T, dnnl::algorithm::eltwise_swish>;
255

J
jakpiase 已提交
256 257
template <typename T>
using HardSwishMKLDNNGradFunctor =
258
    MKLDNNActivationGradFunc<T, dnnl::algorithm::eltwise_hardswish>;
J
jakpiase 已提交
259

260 261 262 263
template <typename T>
using MishMKLDNNGradFunctor =
    MKLDNNActivationGradFunc<T, dnnl::algorithm::eltwise_mish>;

264
template <typename T>
265
using SigmoidMKLDNNGradUseOutFunctor = MKLDNNActivationGradUseOutFunc<
266 267
    T,
    dnnl::algorithm::eltwise_logistic_use_dst_for_bwd>;
268

269
template <typename T>
270
using TanhMKLDNNGradUseOutFunctor = MKLDNNActivationGradUseOutFunc<
271 272
    T,
    dnnl::algorithm::eltwise_tanh_use_dst_for_bwd>;
273 274

template <typename T>
275
using SqrtMKLDNNGradUseOutFunctor = MKLDNNActivationGradUseOutFunc<
276 277
    T,
    dnnl::algorithm::eltwise_sqrt_use_dst_for_bwd>;
278 279

template <typename T>
T
tensor-tang 已提交
280
using AbsMKLDNNGradFunctor =
281
    MKLDNNActivationGradFunc<T, dnnl::algorithm::eltwise_abs>;
J
jakpiase 已提交
282 283

template <typename T>
284
using EluMKLDNNGradUseOutFunctor = MKLDNNActivationGradUseOutFunc<
285 286
    T,
    dnnl::algorithm::eltwise_elu_use_dst_for_bwd>;
287 288 289

template <typename T>
using ExpMKLDNNGradUseOutFunctor = MKLDNNActivationGradUseOutFunc<
290 291
    T,
    dnnl::algorithm::eltwise_exp_use_dst_for_bwd>;
292

293 294 295 296 297
}  // namespace operators
}  // namespace paddle

namespace ops = paddle::operators;

298
#define REGISTER_ACTIVATION_MKLDNN_KERNEL(act_type, functor, grad_functor)    \
299
  REGISTER_OP_KERNEL(                                                         \
300 301 302
      act_type,                                                               \
      MKLDNN,                                                                 \
      ::paddle::platform::CPUPlace,                                           \
303 304 305
      ops::MKLDNNActivationKernel<ops::functor<float>>,                       \
      ops::MKLDNNActivationKernel<ops::functor<paddle::platform::bfloat16>>); \
  REGISTER_OP_KERNEL(                                                         \
306 307 308
      act_type##_grad,                                                        \
      MKLDNN,                                                                 \
      ::paddle::platform::CPUPlace,                                           \
309 310 311
      ops::MKLDNNActivationGradKernel<ops::grad_functor<float>>,              \
      ops::MKLDNNActivationGradKernel<                                        \
          ops::grad_functor<paddle::platform::bfloat16>>);
312

313
#define REGISTER_ACTIVATION_MKLDNN_KERNEL_FWD_ONLY(act_type, functor) \
314 315 316
  REGISTER_OP_KERNEL(act_type,                                        \
                     MKLDNN,                                          \
                     ::paddle::platform::CPUPlace,                    \
317 318
                     ops::MKLDNNActivationKernel<ops::functor<float>>);

J
jakpiase 已提交
319 320
#define FOR_EACH_MKLDNN_KERNEL_FUNCTOR(__macro)                            \
  __macro(abs, AbsMKLDNNFunctor, AbsMKLDNNGradFunctor);                    \
321
  __macro(elu, EluMKLDNNFunctor, EluMKLDNNGradUseOutFunctor);              \
322 323 324 325 326 327 328 329 330 331 332
  __macro(exp, ExpMKLDNNFunctor, ExpMKLDNNGradUseOutFunctor);              \
  __macro(gelu, GeluMKLDNNFunctor, GeluMKLDNNGradFunctor);                 \
  __macro(hard_swish, HardSwishMKLDNNFunctor, HardSwishMKLDNNGradFunctor); \
  __macro(leaky_relu, ReluMKLDNNFunctor, ReluMKLDNNGradFunctor);           \
  __macro(mish, MishMKLDNNFunctor, MishMKLDNNGradFunctor);                 \
  __macro(relu, ReluMKLDNNFunctor, ReluMKLDNNGradFunctor);                 \
  __macro(relu6, Relu6MKLDNNFunctor, Relu6MKLDNNGradFunctor);              \
  __macro(sigmoid, SigmoidMKLDNNFunctor, SigmoidMKLDNNGradUseOutFunctor);  \
  __macro(sqrt, SqrtMKLDNNFunctor, SqrtMKLDNNGradUseOutFunctor);           \
  __macro(swish, SwishMKLDNNFunctor, SwishMKLDNNGradFunctor);              \
  __macro(tanh, TanhMKLDNNFunctor, TanhMKLDNNGradUseOutFunctor);
333 334

FOR_EACH_MKLDNN_KERNEL_FUNCTOR(REGISTER_ACTIVATION_MKLDNN_KERNEL);
335

336
// round eltwise primitive doesn't support BF16, nor does it support grad
337
REGISTER_ACTIVATION_MKLDNN_KERNEL_FWD_ONLY(round, RoundMKLDNNFunctor);
338 339 340

namespace ops = paddle::operators;
REGISTER_OP_KERNEL(
341 342 343
    softplus,
    MKLDNN,
    paddle::platform::CPUPlace,
344 345 346
    ops::MKLDNNActivationKernel<ops::SoftplusMKLDNNFunctor<float>>,
    ops::MKLDNNActivationKernel<
        ops::SoftplusMKLDNNFunctor<paddle::platform::bfloat16>>);