activation_mkldnn_op.cc 8.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

   http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License. */

#include "paddle/fluid/operators/activation_op.h"
16
#include "paddle/fluid/platform/mkldnn_reuse.h"
17 18 19 20

namespace paddle {
namespace operators {

21 22 23 24 25 26 27 28
using framework::DataLayout;
using framework::Tensor;
using mkldnn::memory;
using mkldnn::primitive;
using mkldnn::stream;
using platform::GetMKLDNNFormat;
using platform::MKLDNNDeviceContext;
using platform::to_void_cast;
29

30 31 32 33 34 35
template <typename Functor>
class MKLDNNActivationKernel
    : public framework::OpKernel<typename Functor::ELEMENT_TYPE> {
 public:
  void Compute(const framework::ExecutionContext &ctx) const override {
    const auto *x = ctx.Input<Tensor>("X");
36 37
    PADDLE_ENFORCE_EQ(x->layout(), DataLayout::kMKLDNN,
                      "Wrong layout set for X tensor");
A
Adam 已提交
38
    PADDLE_ENFORCE_NE(x->format(), MKLDNNMemoryFormat::undef,
39
                      "Wrong format set for X tensor");
40 41 42 43 44

    Functor functor;
    functor(ctx);
  }
};
K
Krzysztof Binias 已提交
45

46 47 48 49 50 51
template <typename Functor>
class MKLDNNActivationGradKernel
    : public framework::OpKernel<typename Functor::ELEMENT_TYPE> {
 public:
  void Compute(const framework::ExecutionContext &ctx) const override {
    const auto *diff_y = ctx.Input<Tensor>(framework::GradVarName("Out"));
52 53
    PADDLE_ENFORCE_EQ(diff_y->layout(), DataLayout::kMKLDNN,
                      "Wrong layout set for Input OutGrad tensor");
A
Adam 已提交
54
    PADDLE_ENFORCE_NE(diff_y->format(), MKLDNNMemoryFormat::undef,
55
                      "Wrong format set for Input OutGrad tensor");
56 57 58 59 60 61 62 63

    Functor functor;
    functor(ctx);
  }
};

template <typename T>
void eltwise_forward(const framework::ExecutionContext &ctx,
A
Adam 已提交
64
                     mkldnn::algorithm algorithm) {
65 66 67 68
  PADDLE_ENFORCE(paddle::platform::is_cpu_place(ctx.GetPlace()),
                 "It must use CPUPlace.");
  auto &dev_ctx = ctx.template device_context<MKLDNNDeviceContext>();

69 70
  const auto *x = ctx.Input<Tensor>("X");
  auto *y = ctx.Output<Tensor>("Out");
71

72 73 74 75 76 77 78
  T alpha = ctx.HasAttr("alpha") ? ctx.Attr<T>("alpha") : 0;
  T beta = ctx.HasAttr("beta") ? ctx.Attr<T>("beta") : 0;

  // paddle uses beta but mkldnn uses alpha for swish
  if (algorithm == mkldnn::algorithm::eltwise_swish) {
    std::swap(alpha, beta);
  }
A
Adam 已提交
79

Y
Yihua Xu 已提交
80 81 82 83
  PADDLE_ENFORCE(
      x->dims().size() == 2 || x->dims().size() == 3 || x->dims().size() == 4,
      "Input dim must be with 2, 3 or 4");

A
Adam 已提交
84
  auto src_tz = framework::vectorize<int64_t>(x->dims());
85

86
  auto src_format = src_tz.size() == 2 ? MKLDNNMemoryFormat::nc : x->format();
87

88
  platform::ActivationMKLDNNHandler<T> handler(
89 90
      src_tz, algorithm, alpha, beta, src_format, dev_ctx, ctx.GetPlace(),
      ctx.InputName("X"));
91

92
  auto src_memory_p = handler.AcquireSrcMemory(x);
93 94
  auto dst_memory_p =
      x->IsSharedBufferWith(*y) ? src_memory_p : handler.AcquireDstMemory(y);
A
Adam 已提交
95
  auto activation_p = handler.AcquireForwardPrimitive();
96

A
Adam 已提交
97 98 99 100
  mkldnn::stream astream(dev_ctx.GetEngine());
  activation_p->execute(astream, {{MKLDNN_ARG_FROM, *src_memory_p},
                                  {MKLDNN_ARG_TO, *dst_memory_p}});
  astream.wait();
101

102
  y->set_layout(DataLayout::kMKLDNN);
103
  y->set_format(GetMKLDNNFormat(*dst_memory_p));
104 105
}

106 107
template <typename T>
void eltwise_grad(const framework::ExecutionContext &ctx,
A
Adam 已提交
108
                  mkldnn::algorithm algorithm) {
109 110
  auto &dev_ctx = ctx.template device_context<MKLDNNDeviceContext>();

111
  const auto *x = ctx.Input<Tensor>("X");
112 113
  const auto *diff_y = ctx.Input<Tensor>(framework::GradVarName("Out"));
  auto *diff_x = ctx.Output<Tensor>(framework::GradVarName("X"));
114

115 116 117 118 119 120 121
  T alpha = ctx.HasAttr("alpha") ? ctx.Attr<T>("alpha") : 0;
  T beta = ctx.HasAttr("beta") ? ctx.Attr<T>("beta") : 0;

  // paddle uses beta but mkldnn uses alpha for swish
  if (algorithm == mkldnn::algorithm::eltwise_swish) {
    std::swap(alpha, beta);
  }
A
Adam 已提交
122

A
Adam 已提交
123
  auto diff_dst_tz = framework::vectorize<int64_t>(diff_y->dims());
K
Krzysztof Binias 已提交
124

125 126
  // diff_dst and src dims should be the same
  auto src_format =
127
      diff_dst_tz.size() == 2 ? MKLDNNMemoryFormat::nc : x->format();
128

129
  auto diff_y_format =
130
      diff_dst_tz.size() == 2 ? MKLDNNMemoryFormat::nc : diff_y->format();
131

132 133
  platform::ActivationMKLDNNHandler<T> handler(
      diff_dst_tz, algorithm, alpha, beta, src_format, diff_y_format, dev_ctx,
H
hong 已提交
134
      ctx.GetPlace(), ctx.InputName("X"));
135

136 137 138
  auto src_memory_p = handler.AcquireBackwardSrcMemory(x);
  auto diff_dst_memory_p = handler.AcquireDiffDstMemory(diff_y);
  auto diff_src_memory_p = handler.AcquireDiffSrcMemory(diff_x);
A
Adam 已提交
139 140 141 142 143 144 145 146
  auto activation_backward_p = handler.AcquireBackwardPrimitive();

  mkldnn::stream astream(dev_ctx.GetEngine());
  activation_backward_p->execute(astream,
                                 {{MKLDNN_ARG_SRC, *src_memory_p},
                                  {MKLDNN_ARG_DIFF_DST, *diff_dst_memory_p},
                                  {MKLDNN_ARG_DIFF_SRC, *diff_src_memory_p}});
  astream.wait();
147

148
  diff_x->set_layout(DataLayout::kMKLDNN);
149
  diff_x->set_format(GetMKLDNNFormat(*diff_src_memory_p));
150 151 152 153
}

template <typename T, mkldnn::algorithm algorithm>
struct MKLDNNActivationFunc : public BaseActivationFunctor<T> {
154
  void operator()(const framework::ExecutionContext &ctx) const {
155 156 157 158 159 160
    eltwise_forward<T>(ctx, algorithm);
  }
};

template <typename T, mkldnn::algorithm algorithm>
struct MKLDNNActivationGradFunc : public BaseActivationFunctor<T> {
161
  void operator()(const framework::ExecutionContext &ctx) const {
162 163 164 165
    eltwise_grad<T>(ctx, algorithm);
  }
};

A
Adam 已提交
166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189
template <typename T>
struct GeluMKLDNNFunctor : public BaseActivationFunctor<T> {
  void operator()(const framework::ExecutionContext &ctx) const {
    const bool approximate = ctx.Attr<bool>("approximate");
    if (approximate) {
      eltwise_forward<T>(ctx, mkldnn::algorithm::eltwise_gelu_tanh);
    } else {
      eltwise_forward<T>(ctx, mkldnn::algorithm::eltwise_gelu_erf);
    }
  }
};

template <typename T>
struct GeluMKLDNNGradFunctor : public BaseActivationFunctor<T> {
  void operator()(const framework::ExecutionContext &ctx) const {
    const bool approximate = ctx.Attr<bool>("approximate");
    if (approximate) {
      eltwise_grad<T>(ctx, mkldnn::algorithm::eltwise_gelu_tanh);
    } else {
      eltwise_grad<T>(ctx, mkldnn::algorithm::eltwise_gelu_erf);
    }
  }
};

190
template <typename T>
T
tensor-tang 已提交
191
using ReluMKLDNNFunctor =
192 193
    MKLDNNActivationFunc<T, mkldnn::algorithm::eltwise_relu>;

194 195 196 197
template <typename T>
using SwishMKLDNNFunctor =
    MKLDNNActivationFunc<T, mkldnn::algorithm::eltwise_swish>;

198
template <typename T>
T
tensor-tang 已提交
199
using TanhMKLDNNFunctor =
200 201 202
    MKLDNNActivationFunc<T, mkldnn::algorithm::eltwise_tanh>;

template <typename T>
T
tensor-tang 已提交
203
using SqrtMKLDNNFunctor =
204 205 206
    MKLDNNActivationFunc<T, mkldnn::algorithm::eltwise_sqrt>;

template <typename T>
T
tensor-tang 已提交
207
using AbsMKLDNNFunctor =
208 209 210
    MKLDNNActivationFunc<T, mkldnn::algorithm::eltwise_abs>;

template <typename T>
T
tensor-tang 已提交
211
using ReluMKLDNNGradFunctor =
212 213
    MKLDNNActivationGradFunc<T, mkldnn::algorithm::eltwise_relu>;

214 215 216 217
template <typename T>
using SwishMKLDNNGradFunctor =
    MKLDNNActivationGradFunc<T, mkldnn::algorithm::eltwise_swish>;

218
template <typename T>
T
tensor-tang 已提交
219
using TanhMKLDNNGradFunctor =
220 221 222
    MKLDNNActivationGradFunc<T, mkldnn::algorithm::eltwise_tanh>;

template <typename T>
T
tensor-tang 已提交
223
using SqrtMKLDNNGradFunctor =
224 225 226
    MKLDNNActivationGradFunc<T, mkldnn::algorithm::eltwise_sqrt>;

template <typename T>
T
tensor-tang 已提交
227
using AbsMKLDNNGradFunctor =
228 229 230 231 232 233 234 235 236 237 238 239 240
    MKLDNNActivationGradFunc<T, mkldnn::algorithm::eltwise_abs>;
}  // namespace operators
}  // namespace paddle

namespace ops = paddle::operators;

#define REGISTER_ACTIVATION_MKLDNN_KERNEL(act_type, functor, grad_functor) \
  REGISTER_OP_KERNEL(act_type, MKLDNN, ::paddle::platform::CPUPlace,       \
                     ops::MKLDNNActivationKernel<ops::functor<float>>);    \
  REGISTER_OP_KERNEL(                                                      \
      act_type##_grad, MKLDNN, ::paddle::platform::CPUPlace,               \
      ops::MKLDNNActivationGradKernel<ops::grad_functor<float>>);

A
Adam 已提交
241 242 243
#define FOR_EACH_MKLDNN_KERNEL_FUNCTOR(__macro)                  \
  __macro(relu, ReluMKLDNNFunctor, ReluMKLDNNGradFunctor);       \
  __macro(leaky_relu, ReluMKLDNNFunctor, ReluMKLDNNGradFunctor); \
A
Adam 已提交
244
  __macro(gelu, GeluMKLDNNFunctor, GeluMKLDNNGradFunctor);       \
245
  __macro(swish, SwishMKLDNNFunctor, SwishMKLDNNGradFunctor);    \
A
Adam 已提交
246 247
  __macro(tanh, TanhMKLDNNFunctor, TanhMKLDNNGradFunctor);       \
  __macro(sqrt, SqrtMKLDNNFunctor, SqrtMKLDNNGradFunctor);       \
T
tensor-tang 已提交
248
  __macro(abs, AbsMKLDNNFunctor, AbsMKLDNNGradFunctor);
249 250

FOR_EACH_MKLDNN_KERNEL_FUNCTOR(REGISTER_ACTIVATION_MKLDNN_KERNEL);