activation_mkldnn_op.cc 8.0 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

   http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License. */

#include "paddle/fluid/operators/activation_op.h"
16
#include "paddle/fluid/platform/mkldnn_reuse.h"
17 18 19 20

namespace paddle {
namespace operators {

21 22 23 24 25 26 27 28
using framework::DataLayout;
using framework::Tensor;
using mkldnn::memory;
using mkldnn::primitive;
using mkldnn::stream;
using platform::GetMKLDNNFormat;
using platform::MKLDNNDeviceContext;
using platform::to_void_cast;
29 30

namespace {
K
Krzysztof Binias 已提交
31 32
std::string gethash(const mkldnn::memory::dims &operand_dims,
                    const mkldnn::algorithm algorithm) {
K
Krzysztof Binias 已提交
33 34 35 36 37 38 39 40
  auto dim2str = [](const mkldnn::memory::dims &operand_dims) {
    std::string dstr = "";
    for (size_t i = 0; i < operand_dims.size(); ++i) {
      dstr += std::to_string(operand_dims[i]) + "-";
    }
    return dstr;
  };
  return dim2str(operand_dims) + std::to_string(algorithm);
K
Krzysztof Binias 已提交
41
}
42 43 44 45 46 47 48 49
}  // namespace

template <typename Functor>
class MKLDNNActivationKernel
    : public framework::OpKernel<typename Functor::ELEMENT_TYPE> {
 public:
  void Compute(const framework::ExecutionContext &ctx) const override {
    const auto *x = ctx.Input<Tensor>("X");
50 51 52 53
    PADDLE_ENFORCE_EQ(x->layout(), DataLayout::kMKLDNN,
                      "Wrong layout set for X tensor");
    PADDLE_ENFORCE_NE(x->format(), MKLDNNMemoryFormat::format_undef,
                      "Wrong format set for X tensor");
54 55 56 57 58

    Functor functor;
    functor(ctx);
  }
};
K
Krzysztof Binias 已提交
59

60 61 62 63 64 65
template <typename Functor>
class MKLDNNActivationGradKernel
    : public framework::OpKernel<typename Functor::ELEMENT_TYPE> {
 public:
  void Compute(const framework::ExecutionContext &ctx) const override {
    const auto *diff_y = ctx.Input<Tensor>(framework::GradVarName("Out"));
66 67 68 69
    PADDLE_ENFORCE_EQ(diff_y->layout(), DataLayout::kMKLDNN,
                      "Wrong layout set for Input OutGrad tensor");
    PADDLE_ENFORCE_NE(diff_y->format(), MKLDNNMemoryFormat::format_undef,
                      "Wrong format set for Input OutGrad tensor");
70

71 72
    PADDLE_ENFORCE_EQ(
        ctx.Attr<bool>("is_test"), false,
73 74
        "is_test attribute should be set to False in training phase.");

75 76 77 78 79 80 81
    Functor functor;
    functor(ctx);
  }
};

template <typename T>
void eltwise_forward(const framework::ExecutionContext &ctx,
A
Adam 已提交
82
                     mkldnn::algorithm algorithm) {
83 84 85 86
  PADDLE_ENFORCE(paddle::platform::is_cpu_place(ctx.GetPlace()),
                 "It must use CPUPlace.");
  auto &dev_ctx = ctx.template device_context<MKLDNNDeviceContext>();

87 88
  const auto *x = ctx.Input<Tensor>("X");
  auto *y = ctx.Output<Tensor>("Out");
89

A
Adam 已提交
90 91 92
  const T alpha = ctx.op().HasAttr("alpha") ? ctx.Attr<T>("alpha") : 0;
  const T beta = ctx.op().HasAttr("beta") ? ctx.Attr<T>("beta") : 0;

Y
Yihua Xu 已提交
93 94 95 96
  PADDLE_ENFORCE(
      x->dims().size() == 2 || x->dims().size() == 3 || x->dims().size() == 4,
      "Input dim must be with 2, 3 or 4");

97
  auto src_tz = framework::vectorize<int>(x->dims());
98

99
  auto src_format = src_tz.size() == 2 ? MKLDNNMemoryFormat::nc : x->format();
100

101 102
  bool is_test = ctx.Attr<bool>("is_test");

103 104 105
  platform::ActivationMKLDNNHandler<T> handler(
      src_tz, algorithm, alpha, beta, src_format, is_test, dev_ctx,
      ctx.GetPlace(), ctx.op().Input("X"));
106

107 108
  auto src_memory_p = handler.AcquireSrcMemory(x);
  auto dst_memory_p = handler.AcquireDstMemory(y);
109 110
  auto activation_p = handler.AcquireActivation(dst_memory_p, src_memory_p);

111
  // push primitive to stream and wait until it's executed
112
  std::vector<primitive> pipeline;
113
  pipeline.push_back(*activation_p);
114 115
  stream(stream::kind::eager).submit(pipeline).wait();

116
  y->set_layout(DataLayout::kMKLDNN);
117
  y->set_format(GetMKLDNNFormat(*dst_memory_p));
118 119
}

120 121
template <typename T>
void eltwise_grad(const framework::ExecutionContext &ctx,
A
Adam 已提交
122
                  mkldnn::algorithm algorithm) {
123 124
  auto &dev_ctx = ctx.template device_context<MKLDNNDeviceContext>();

125
  const auto *x = ctx.Input<Tensor>("X");
126 127
  const auto *diff_y = ctx.Input<Tensor>(framework::GradVarName("Out"));
  auto *diff_x = ctx.Output<Tensor>(framework::GradVarName("X"));
128

A
Adam 已提交
129 130 131
  const T alpha = ctx.op().HasAttr("alpha") ? ctx.Attr<T>("alpha") : 0;
  const T beta = ctx.op().HasAttr("beta") ? ctx.Attr<T>("beta") : 0;

132
  auto diff_dst_tz = framework::vectorize<int>(diff_y->dims());
K
Krzysztof Binias 已提交
133

134 135
  // diff_dst and src dims should be the same
  auto src_format =
136
      diff_dst_tz.size() == 2 ? MKLDNNMemoryFormat::nc : x->format();
137

138
  auto diff_y_format =
139
      diff_dst_tz.size() == 2 ? MKLDNNMemoryFormat::nc : diff_y->format();
140

141 142 143
  platform::ActivationMKLDNNHandler<T> handler(
      diff_dst_tz, algorithm, alpha, beta, src_format, diff_y_format, dev_ctx,
      ctx.GetPlace(), ctx.op().Input("X"));
144

145 146 147
  auto src_memory_p = handler.AcquireBackwardSrcMemory(x);
  auto diff_dst_memory_p = handler.AcquireDiffDstMemory(diff_y);
  auto diff_src_memory_p = handler.AcquireDiffSrcMemory(diff_x);
148
  auto activation_backward_p = handler.AcquireActivationBackward(
149
      diff_src_memory_p, diff_dst_memory_p, src_memory_p);
150 151

  // push primitive to stream and wait until it's executed
152
  std::vector<primitive> pipeline;
153
  pipeline.push_back(*activation_backward_p);
154 155
  stream(stream::kind::eager).submit(pipeline).wait();

156
  diff_x->set_layout(DataLayout::kMKLDNN);
157
  diff_x->set_format(GetMKLDNNFormat(*diff_src_memory_p));
158 159 160 161
}

template <typename T, mkldnn::algorithm algorithm>
struct MKLDNNActivationFunc : public BaseActivationFunctor<T> {
162
  void operator()(const framework::ExecutionContext &ctx) const {
163 164 165 166 167 168
    eltwise_forward<T>(ctx, algorithm);
  }
};

template <typename T, mkldnn::algorithm algorithm>
struct MKLDNNActivationGradFunc : public BaseActivationFunctor<T> {
169
  void operator()(const framework::ExecutionContext &ctx) const {
170 171 172 173 174
    eltwise_grad<T>(ctx, algorithm);
  }
};

template <typename T>
T
tensor-tang 已提交
175
using ReluMKLDNNFunctor =
176 177 178
    MKLDNNActivationFunc<T, mkldnn::algorithm::eltwise_relu>;

template <typename T>
T
tensor-tang 已提交
179
using TanhMKLDNNFunctor =
180 181 182
    MKLDNNActivationFunc<T, mkldnn::algorithm::eltwise_tanh>;

template <typename T>
T
tensor-tang 已提交
183
using SqrtMKLDNNFunctor =
184 185 186
    MKLDNNActivationFunc<T, mkldnn::algorithm::eltwise_sqrt>;

template <typename T>
T
tensor-tang 已提交
187
using AbsMKLDNNFunctor =
188 189 190
    MKLDNNActivationFunc<T, mkldnn::algorithm::eltwise_abs>;

template <typename T>
T
tensor-tang 已提交
191
using ReluMKLDNNGradFunctor =
192 193 194
    MKLDNNActivationGradFunc<T, mkldnn::algorithm::eltwise_relu>;

template <typename T>
T
tensor-tang 已提交
195
using TanhMKLDNNGradFunctor =
196 197 198
    MKLDNNActivationGradFunc<T, mkldnn::algorithm::eltwise_tanh>;

template <typename T>
T
tensor-tang 已提交
199
using SqrtMKLDNNGradFunctor =
200 201 202
    MKLDNNActivationGradFunc<T, mkldnn::algorithm::eltwise_sqrt>;

template <typename T>
T
tensor-tang 已提交
203
using AbsMKLDNNGradFunctor =
204 205 206 207 208 209 210 211 212 213 214 215 216
    MKLDNNActivationGradFunc<T, mkldnn::algorithm::eltwise_abs>;
}  // namespace operators
}  // namespace paddle

namespace ops = paddle::operators;

#define REGISTER_ACTIVATION_MKLDNN_KERNEL(act_type, functor, grad_functor) \
  REGISTER_OP_KERNEL(act_type, MKLDNN, ::paddle::platform::CPUPlace,       \
                     ops::MKLDNNActivationKernel<ops::functor<float>>);    \
  REGISTER_OP_KERNEL(                                                      \
      act_type##_grad, MKLDNN, ::paddle::platform::CPUPlace,               \
      ops::MKLDNNActivationGradKernel<ops::grad_functor<float>>);

A
Adam 已提交
217 218 219 220 221
#define FOR_EACH_MKLDNN_KERNEL_FUNCTOR(__macro)                  \
  __macro(relu, ReluMKLDNNFunctor, ReluMKLDNNGradFunctor);       \
  __macro(leaky_relu, ReluMKLDNNFunctor, ReluMKLDNNGradFunctor); \
  __macro(tanh, TanhMKLDNNFunctor, TanhMKLDNNGradFunctor);       \
  __macro(sqrt, SqrtMKLDNNFunctor, SqrtMKLDNNGradFunctor);       \
T
tensor-tang 已提交
222
  __macro(abs, AbsMKLDNNFunctor, AbsMKLDNNGradFunctor);
223 224

FOR_EACH_MKLDNN_KERNEL_FUNCTOR(REGISTER_ACTIVATION_MKLDNN_KERNEL);