activation_op.h 12.5 KB
Newer Older
Q
qijun 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

   http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License. */

#pragma once
#include "paddle/framework/eigen.h"
#include "paddle/framework/op_registry.h"

namespace paddle {
namespace operators {

22 23 24
template <typename Place, typename Functor>
class ActivationKernel
    : public framework::OpKernel<typename Functor::ELEMENT_TYPE> {
Q
qijun 已提交
25
 public:
26 27
  using T = typename Functor::ELEMENT_TYPE;

Q
qijun 已提交
28 29 30 31 32 33 34 35 36
  void Compute(const framework::ExecutionContext& context) const override {
    auto* X = context.Input<framework::Tensor>("X");
    auto* Y = context.Output<framework::Tensor>("Y");
    Y->mutable_data<T>(context.GetPlace());

    auto x = framework::EigenVector<T>::Flatten(*X);
    auto y = framework::EigenVector<T>::Flatten(*Y);
    auto place = context.GetEigenDevice<Place>();
    Functor functor;
37 38 39 40 41

    auto attrs = functor.GetAttrs();
    for (auto& attr : attrs) {
      *attr.second = context.Attr<float>(attr.first);
    }
Q
qijun 已提交
42 43 44 45
    functor(place, x, y);
  }
};

46 47 48
template <typename Place, typename Functor>
class ActivationGradKernel
    : public framework::OpKernel<typename Functor::ELEMENT_TYPE> {
Q
qijun 已提交
49
 public:
50
  using T = typename Functor::ELEMENT_TYPE;
Q
qijun 已提交
51 52 53 54 55 56 57 58 59 60 61 62 63
  void Compute(const framework::ExecutionContext& context) const override {
    auto* X = context.Input<framework::Tensor>("X");
    auto* Y = context.Input<framework::Tensor>("Y");
    auto* dY = context.Input<framework::Tensor>(framework::GradVarName("Y"));
    auto* dX = context.Output<framework::Tensor>(framework::GradVarName("X"));
    dX->mutable_data<T>(context.GetPlace());

    auto dy = framework::EigenVector<T>::Flatten(*dY);
    auto x = framework::EigenVector<T>::Flatten(*X);
    auto y = framework::EigenVector<T>::Flatten(*Y);
    auto dx = framework::EigenVector<T>::Flatten(*dX);
    auto place = context.GetEigenDevice<Place>();
    Functor functor;
64 65 66 67
    auto attrs = functor.GetAttrs();
    for (auto& attr : attrs) {
      *attr.second = context.Attr<float>(attr.first);
    }
Q
qijun 已提交
68 69 70 71
    functor(place, x, y, dy, dx);
  }
};

72 73 74 75 76 77 78 79 80
template <typename T>
struct BaseActivationFunctor {
  using ELEMENT_TYPE = T;

  using AttrPair = std::vector<std::pair<const char*, float*>>;

  AttrPair GetAttrs() { return AttrPair(); }
};

81
// sigmoid(x) = 1 / (1 + exp(-x))
Q
qijun 已提交
82
template <typename T>
83
struct SigmoidFunctor : public BaseActivationFunctor<T> {
Q
qijun 已提交
84
  template <typename Device, typename X, typename Y>
85
  void operator()(Device d, X x, Y y) const {
86
    y.device(d) = static_cast<T>(1) / (static_cast<T>(1) + (-x).exp());
Q
qijun 已提交
87 88 89
  }
};

90
template <typename T>
91
struct SigmoidGradFunctor : public BaseActivationFunctor<T> {
Q
qijun 已提交
92
  template <typename Device, typename X, typename Y, typename dY, typename dX>
93
  void operator()(Device d, X x, Y y, dY dy, dX dx) const {
94
    dx.device(d) = dy * y * (static_cast<T>(1) - y);
Q
qijun 已提交
95 96 97
  }
};

Q
qijun 已提交
98
// exp(x) = e^x
99 100
template <typename T>
struct ExpFunctor : public BaseActivationFunctor<T> {
Q
qijun 已提交
101
  template <typename Device, typename X, typename Y>
102
  void operator()(Device d, X x, Y y) const {
Q
qijun 已提交
103 104 105 106
    y.device(d) = x.exp();
  }
};

107 108
template <typename T>
struct ExpGradFunctor : public BaseActivationFunctor<T> {
Q
qijun 已提交
109
  template <typename Device, typename X, typename Y, typename dY, typename dX>
110
  void operator()(Device d, X x, Y y, dY dy, dX dx) const {
Q
qijun 已提交
111
    dx.device(d) = dy * y;
Q
qijun 已提交
112 113 114
  }
};

Q
qijun 已提交
115
// relu(x) = max(x, 0)
Q
qijun 已提交
116
template <typename T>
117
struct ReluFunctor : public BaseActivationFunctor<T> {
Q
qijun 已提交
118
  template <typename Device, typename X, typename Y>
119
  void operator()(Device d, X x, Y y) const {
Q
qijun 已提交
120 121 122
    y.device(d) = x.cwiseMax(static_cast<T>(0));
  }
};
Q
qijun 已提交
123

Q
qijun 已提交
124
template <typename T>
125
struct ReluGradFunctor : public BaseActivationFunctor<T> {
Q
qijun 已提交
126
  template <typename Device, typename X, typename Y, typename dY, typename dX>
127
  void operator()(Device d, X x, Y y, dY dy, dX dx) const {
Q
qijun 已提交
128 129 130
    dx.device(d) = dy * (x > static_cast<T>(0)).template cast<T>();
  }
};
Q
qijun 已提交
131

132
// tanh(x) = (exp(x) - exp(-x)) / (exp(x) + exp(-x))
133 134
template <typename T>
struct TanhFunctor : public BaseActivationFunctor<T> {
Q
qijun 已提交
135
  template <typename Device, typename X, typename Y>
136
  void operator()(Device d, X x, Y y) const {
Q
qijun 已提交
137 138 139 140 141
    y.device(d) = x.tanh();
  }
};

template <typename T>
142
struct TanhGradFunctor : public BaseActivationFunctor<T> {
Q
qijun 已提交
143
  template <typename Device, typename X, typename Y, typename dY, typename dX>
144
  void operator()(Device d, X x, Y y, dY dy, dX dx) const {
145
    dx.device(d) = dy * (static_cast<T>(1) - y * y);
Q
qijun 已提交
146 147 148
  }
};

Q
qijun 已提交
149
// sqrt(x) = x^(1/2)
150 151
template <typename T>
struct SqrtFunctor : public BaseActivationFunctor<T> {
Q
qijun 已提交
152
  template <typename Device, typename X, typename Y>
153
  void operator()(Device d, X x, Y y) const {
Q
qijun 已提交
154 155 156 157 158
    y.device(d) = x.sqrt();
  }
};

template <typename T>
159
struct SqrtGradFunctor : public BaseActivationFunctor<T> {
Q
qijun 已提交
160
  template <typename Device, typename X, typename Y, typename dY, typename dX>
161
  void operator()(Device d, X x, Y y, dY dy, dX dx) const {
162
    const Y y_conj = Eigen::numext::conj(y);
Q
qijun 已提交
163 164 165 166
    dx.device(d) = static_cast<T>(0.5) * dy / y_conj;
  }
};

Q
qijun 已提交
167
// abs(x) = |x|
168 169
template <typename T>
struct AbsFunctor : public BaseActivationFunctor<T> {
Q
qijun 已提交
170
  template <typename Device, typename X, typename Y>
171
  void operator()(Device d, X x, Y y) const {
Q
qijun 已提交
172 173 174 175
    y.device(d) = x.abs();
  }
};

176 177
template <typename T>
struct AbsGradFunctor : public BaseActivationFunctor<T> {
178
  template <typename Device, typename X, typename Y, typename dY, typename dX>
179
  void operator()(Device d, X x, Y y, dY dy, dX dx) const {
180 181 182 183
    dx.device(d) = dy * x.sign();
  }
};

Q
qijun 已提交
184 185
// reciprocal(x) = 1 / x
template <typename T>
186
struct ReciprocalFunctor : public BaseActivationFunctor<T> {
Q
qijun 已提交
187
  template <typename Device, typename X, typename Y>
188
  void operator()(Device d, X x, Y y) const {
189
    y.device(d) = static_cast<T>(1) / x;
Q
qijun 已提交
190 191 192
  }
};

193
template <typename T>
194
struct ReciprocalGradFunctor : public BaseActivationFunctor<T> {
Q
qijun 已提交
195
  template <typename Device, typename X, typename Y, typename dY, typename dX>
196
  void operator()(Device d, X x, Y y, dY dy, dX dx) const {
197
    dx.device(d) = dy * static_cast<T>(-1) * y * y;
Q
qijun 已提交
198 199 200 201
  }
};

// log(x) = natural logarithm of x
202 203
template <typename T>
struct LogFunctor : public BaseActivationFunctor<T> {
Q
qijun 已提交
204
  template <typename Device, typename X, typename Y>
205
  void operator()(Device d, X x, Y y) const {
Q
qijun 已提交
206 207 208 209
    y.device(d) = x.log();
  }
};

210
template <typename T>
211
struct LogGradFunctor : public BaseActivationFunctor<T> {
Q
qijun 已提交
212
  template <typename Device, typename X, typename Y, typename dY, typename dX>
213
  void operator()(Device d, X x, Y y, dY dy, dX dx) const {
214
    dx.device(d) = dy * (static_cast<T>(1) / x);
Q
qijun 已提交
215 216 217 218
  }
};

// square(x) = x^2
219 220
template <typename T>
struct SquareFunctor : public BaseActivationFunctor<T> {
Q
qijun 已提交
221
  template <typename Device, typename X, typename Y>
222
  void operator()(Device d, X x, Y y) const {
Q
qijun 已提交
223 224
    y.device(d) = x.square();
  }
225
};
Q
qijun 已提交
226

227
template <typename T>
228
struct SquareGradFunctor : public BaseActivationFunctor<T> {
Q
qijun 已提交
229
  template <typename Device, typename X, typename Y, typename dY, typename dX>
230
  void operator()(Device d, X x, Y y, dY dy, dX dx) const {
231 232 233 234
    dx.device(d) = dy * static_cast<T>(2) * x;
  }
};

235 236 237 238 239 240 241 242 243 244
template <typename T>
struct BReluFunctor : public BaseActivationFunctor<T> {
  float t_min;
  float t_max;

  // NOTE: Explicit hides the `BaseActivationFunctor<T>::GetAttrs`
  // not polymorphism for speed.
  typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
    return {{"t_min", &t_min}, {"t_max", &t_max}};
  }
245

246 247 248
  template <typename Device, typename X, typename Y>
  void operator()(Device d, X x, Y y) const {
    y.device(d) = x.cwiseMax(t_min).cwiseMin(t_max);
249 250 251
  }
};

252 253 254 255 256 257 258 259 260 261
template <typename T>
struct BReluGradFunctor : public BaseActivationFunctor<T> {
  float t_min;
  float t_max;
  typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
    return {{"t_min", &t_min}, {"t_max", &t_max}};
  }
  template <typename Device, typename X, typename Y, typename dY, typename dX>
  void operator()(Device d, X x, Y y, dY dy, dX dx) const {
    dx.device(d) = dy * ((x > t_min) * (x < t_max)).template cast<T>();
262 263 264
  }
};

265 266
// softsign(x) = x / (1 + |x|)
template <typename T>
267
struct SoftsignFunctor : public BaseActivationFunctor<T> {
268 269 270 271 272 273 274 275 276
  template <typename Device, typename X, typename Y>
  void operator()(Device d, X x, Y y) {
    y.device(d) = x / (static_cast<T>(1) + x.abs());
  }
};

// d(softsign(x))/dx = 1 / (1 + |x|)^2
// Taken from https://en.wikipedia.org/wiki/Activation_function
template <typename T>
277
struct SoftsignGradFunctor : public BaseActivationFunctor<T> {
278 279 280 281 282 283 284
  template <typename Device, typename X, typename Y, typename dY, typename dX>
  void operator()(Device d, X x, Y y, dY dy, dX dx) {
    dx.device(d) =
        dy * (static_cast<T>(1) / (static_cast<T>(1) + x.abs()).square());
  }
};

285 286 287 288 289 290
template <typename T>
struct SoftReluFunctor : public BaseActivationFunctor<T> {
  float threshold;
  typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
    return {{"threshold", &threshold}};
  }
291

292 293 294 295
  template <typename Device, typename X, typename Y>
  void operator()(Device d, X x, Y y) const {
    auto temp = x.cwiseMax(-threshold).cwiseMin(threshold);
    y.device(d) = (static_cast<T>(1) + temp.exp()).log();
296 297 298
  }
};

299 300 301 302 303 304 305 306
template <typename T>
struct SoftReluGradFunctor : public BaseActivationFunctor<T> {
  float threshold;
  typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
    return {{"threshold", &threshold}};
  }
  template <typename Device, typename X, typename Y, typename dY, typename dX>
  void operator()(Device d, X x, Y y, dY dy, dX dx) const {
307
    auto temp = ((x > -threshold) * (x < threshold)).template cast<T>().eval();
308
    dx.device(d) = dy * (static_cast<T>(1) - (-y).exp()) * temp;
309 310 311
  }
};

312 313 314 315 316 317 318 319 320
template <typename T>
struct PowFunctor : public BaseActivationFunctor<T> {
  float factor;
  typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
    return {{"factor", &factor}};
  }
  template <typename Device, typename X, typename Y>
  void operator()(Device d, X x, Y y) const {
    y.device(d) = x.pow(factor);
321 322 323
  }
};

324 325 326 327 328 329 330 331 332
template <typename T>
struct PowGradFunctor : public BaseActivationFunctor<T> {
  float factor;
  typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
    return {{"factor", &factor}};
  }
  template <typename Device, typename X, typename Y, typename dY, typename dX>
  void operator()(Device d, X x, Y y, dY dy, dX dx) const {
    dx.device(d) = dy * factor * x.pow(factor - static_cast<T>(1));
333 334 335
  }
};

336 337 338 339 340 341 342
template <typename T>
struct STanhFunctor : public BaseActivationFunctor<T> {
  float scale_a;
  float scale_b;
  typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
    return {{"scale_a", &scale_a}, {"scale_b", &scale_b}};
  }
343

344 345 346
  template <typename Device, typename X, typename Y>
  void operator()(Device d, X x, Y y) const {
    y.device(d) = scale_b * (scale_a * x).tanh();
347 348 349
  }
};

350 351 352 353 354 355 356
template <typename T>
struct STanhGradFunctor : public BaseActivationFunctor<T> {
  float scale_a;
  float scale_b;
  typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
    return {{"scale_a", &scale_a}, {"scale_b", &scale_b}};
  }
357

358 359
  template <typename Device, typename X, typename Y, typename dY, typename dX>
  void operator()(Device d, X x, Y y, dY dy, dX dx) const {
360
    auto temp = (scale_a * x).tanh() * (scale_a * x).tanh();
361
    dx.device(d) = dy * scale_a * scale_b * (static_cast<T>(1) - temp);
Q
qijun 已提交
362 363 364
  }
};

Q
qijun 已提交
365 366
}  // namespace operators
}  // namespace paddle
367 368 369 370 371 372 373 374 375 376 377 378 379 380

#define FOR_EACH_KERNEL_FUNCTOR(__macro)                         \
  __macro(sigmoid, SigmoidFunctor, SigmoidGradFunctor);          \
  __macro(exp, ExpFunctor, ExpGradFunctor);                      \
  __macro(relu, ReluFunctor, ReluGradFunctor);                   \
  __macro(tanh, TanhFunctor, TanhGradFunctor);                   \
  __macro(sqrt, SqrtFunctor, SqrtGradFunctor);                   \
  __macro(abs, AbsFunctor, AbsGradFunctor);                      \
  __macro(reciprocal, ReciprocalFunctor, ReciprocalGradFunctor); \
  __macro(log, LogFunctor, LogGradFunctor);                      \
  __macro(square, SquareFunctor, SquareGradFunctor);             \
  __macro(brelu, BReluFunctor, BReluGradFunctor);                \
  __macro(soft_relu, SoftReluFunctor, SoftReluGradFunctor);      \
  __macro(pow, PowFunctor, PowGradFunctor);                      \
381 382
  __macro(stanh, STanhFunctor, STanhGradFunctor);                \
  __macro(softsign, SoftsignFunctor, SoftsignGradFunctor)