ActivationFunction.cpp 12.8 KB
Newer Older
1
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Z
zhangjinchao01 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include "ActivationFunction.h"

#include <algorithm>
#include <iostream>
Y
Yu Yang 已提交
19
#include <memory>
Z
zhangjinchao01 已提交
20 21
#include <string>
#include <thread>
Y
Yu Yang 已提交
22
#include <type_traits>
Z
zhangjinchao01 已提交
23
#include "paddle/parameter/Argument.h"
Y
Yu Yang 已提交
24
#include "paddle/utils/ClassRegistrar.h"
Z
zhangjinchao01 已提交
25 26
#include "paddle/utils/Logging.h"

27 28 29 30
#ifdef PADDLE_USE_MKLDNN
#include "MKLDNNActivation.h"
#endif

Z
zhangjinchao01 已提交
31 32 33
namespace paddle {

static ClassRegistrar<ActivationFunction> gActivationRegistrar;
Q
qijun 已提交
34 35 36 37 38 39
/**
 * @def ACTIVATION_CLASS_NAME
 * @brief Macro for getting derived activation class name
 * @note ACTIVATION_CLASS_NAME(softmax) softmax_;
 * means softmaxActivation softmax_;
 */
Z
zhangjinchao01 已提交
40
#define ACTIVATION_CLASS_NAME(ACTIVATION_NAME) ACTIVATION_NAME##Activation
Q
qijun 已提交
41 42 43 44
/**
 * @def BEGIN_DEFINE_ACTIVATION
 * @brief Macro for defining a devried activation class
 */
Z
zhangjinchao01 已提交
45 46 47 48 49 50 51
#define BEGIN_DEFINE_ACTIVATION(ACTIVATION_NAME)                             \
  class ACTIVATION_CLASS_NAME(ACTIVATION_NAME) : public ActivationFunction { \
  private:                                                                   \
    static const std::string name;                                           \
                                                                             \
  public:                                                                    \
    const std::string& getName() const { return name; }
Q
qijun 已提交
52 53 54 55
/**
 * @def END_DEFINE_ACTIVATION
 * @brief Macro for registering a derived activation class
 */
Z
zhangjinchao01 已提交
56
#define END_DEFINE_ACTIVATION(ACTIVATION_NAME)                     \
57 58
  }                                                                \
  ;                                                                \
Z
zhangjinchao01 已提交
59 60 61
  const std::string ACTIVATION_CLASS_NAME(ACTIVATION_NAME)::name = \
      #ACTIVATION_NAME;                                            \
  static InitFunction __reg_activation__##ACTIVATION_NAME([] {     \
62 63 64
    gActivationRegistrar                                           \
        .registerClass<ACTIVATION_CLASS_NAME(ACTIVATION_NAME)>(    \
            #ACTIVATION_NAME);                                     \
Z
zhangjinchao01 已提交
65 66 67 68 69 70 71 72 73 74
  });

/**
 * @brief The IdentityActivation class
 *
 * Do nothing when forward/backward.
 */
class IdentityActivation : public ActivationFunction {
public:
  static const std::string name;
Y
Yu Yang 已提交
75
  Error __must_check forward(Argument& act) {
76
    (void)act;
Y
Yu Yang 已提交
77
    return Error();
78
  }
Y
Yu Yang 已提交
79
  Error __must_check backward(Argument& act) {
80
    (void)act;
Y
Yu Yang 已提交
81
    return Error();
82
  }
Z
zhangjinchao01 已提交
83 84 85 86 87 88 89 90 91
  const std::string& getName() const { return name; }
};
const std::string IdentityActivation::name = "";
static InitFunction __reg_activation__identity([] {
  gActivationRegistrar.registerClass<IdentityActivation>("");
  gActivationRegistrar.registerClass<IdentityActivation>("linear");
});

/**
Q
qijun 已提交
92 93
 * @brief Sigmoid Activation
 * \f[
Z
zhangjinchao01 已提交
94
 * f(z) = \frac{1}{1+exp(-z)}
Q
qijun 已提交
95
 * \f]
Z
zhangjinchao01 已提交
96 97
 */
BEGIN_DEFINE_ACTIVATION(sigmoid)
Y
Yu Yang 已提交
98
Error __must_check forward(Argument& act) {
99
  act.value->sigmoid(*act.value);
Y
Yu Yang 已提交
100
  return Error();
101
}
Y
Yu Yang 已提交
102
Error __must_check backward(Argument& act) {
103
  act.grad->sigmoidDerivative(*act.value);
Y
Yu Yang 已提交
104
  return Error();
105
}
Z
zhangjinchao01 已提交
106 107 108
END_DEFINE_ACTIVATION(sigmoid)

/**
Q
qijun 已提交
109 110
 * @brief Softmax Activation
 * \f[
Z
zhangjinchao01 已提交
111
 * P(y=j|x) = \frac{e^{x^Tw_j}}{\sum^K_{k=1}e^{x^Tw_k}}
Q
qijun 已提交
112
 * \f]
Z
zhangjinchao01 已提交
113 114 115 116 117 118 119
 */
BEGIN_DEFINE_ACTIVATION(softmax)
private:
MatrixPtr sftMaxSum_;
MatrixPtr sftMaxDot_;

public:
Y
Yu Yang 已提交
120
Error __must_check forward(Argument& act) {
121
  act.value->softmax(*act.value);
Y
Yu Yang 已提交
122
  return Error();
123
}
Z
zhangjinchao01 已提交
124

Y
Yu Yang 已提交
125
Error __must_check backward(Argument& act) {
Z
zhangjinchao01 已提交
126 127 128 129 130 131 132
  MatrixPtr outputV = act.value;
  MatrixPtr outputG = act.grad;

  if (outputG->useGpu()) {
    outputG->softmaxBackward(*outputV);
  } else {
    SetDevice device(act.deviceId);
133 134
    Matrix::resizeOrCreate(sftMaxDot_,
                           outputG->getHeight(),
Z
zhangjinchao01 已提交
135
                           outputG->getWidth(),
136 137 138 139 140 141 142
                           /* trans */ false,
                           useGpu(act.deviceId));
    Matrix::resizeOrCreate(sftMaxSum_,
                           outputG->getHeight(),
                           1,
                           /* trans */ false,
                           useGpu(act.deviceId));
Z
zhangjinchao01 已提交
143 144 145 146 147 148

    sftMaxDot_->dotMul(*outputG, *outputV);
    sftMaxSum_->colMerge(*sftMaxDot_);

    act.grad->softmaxDerivative(*act.value, *sftMaxSum_);
  }
Y
Yu Yang 已提交
149
  return Error();
Z
zhangjinchao01 已提交
150 151 152
}
END_DEFINE_ACTIVATION(softmax)

Q
qijun 已提交
153 154 155 156 157
/**
 * @brief Sequence_softmax Activation
 * @note Softmax on all frames of one sequence.
 * Width of frame must be one.
 */
Z
zhangjinchao01 已提交
158 159 160 161 162 163
BEGIN_DEFINE_ACTIVATION(sequence_softmax)
private:
ACTIVATION_CLASS_NAME(softmax) softmax_;
Argument argument_;

public:
Y
Yu Yang 已提交
164
Error __must_check forward(Argument& act) {
165
  if (act.value->getWidth() != 1UL) {
Y
Yu Yang 已提交
166
    return Error(
167 168
        "Input width for each timestep of sequence softmax should be 1");
  }
Z
zhangjinchao01 已提交
169 170

  if (!argument_.value) {
171 172 173 174 175 176 177 178 179 180
    argument_.value = Matrix::create(nullptr,
                                     /* height= */ 1,
                                     1,
                                     /* trans= */ false,
                                     useGpu(act.deviceId));
    argument_.grad = Matrix::create(nullptr,
                                    /* height= */ 1,
                                    1,
                                    /* trans= */ false,
                                    useGpu(act.deviceId));
Z
zhangjinchao01 已提交
181 182
  }

183 184 185 186
  auto starts =
      act.hasSubseq()
          ? act.subSequenceStartPositions->getVector(useGpu(act.deviceId))
          : act.sequenceStartPositions->getVector(useGpu(act.deviceId));
Z
zhangjinchao01 已提交
187
  act.value->sequenceSoftmax(*act.value, *starts);
Y
Yu Yang 已提交
188
  return Error();
Z
zhangjinchao01 已提交
189 190
}

Y
Yu Yang 已提交
191
Error __must_check backward(Argument& act) {
192
  if (act.value->getWidth() != 1UL) {
Y
Yu Yang 已提交
193
    return Error(
194 195
        "Input width for each timestep of sequence softmax should be 1");
  }
Z
zhangjinchao01 已提交
196

197 198 199
  size_t numSequences =
      act.hasSubseq() ? act.getNumSubSequences() : act.getNumSequences();
  const int* starts = act.getCpuStartPositions();
Z
zhangjinchao01 已提交
200 201 202 203 204 205 206 207

  for (size_t i = 0; i < numSequences; ++i) {
    // TODO(Dangqingqing) optimization for GPU
    size_t offset = starts[i];
    size_t size = starts[i + 1] - starts[i];
    argument_.value->setData(act.value->getData() + offset, 1UL, size);
    argument_.grad->setData(act.grad->getData() + offset, 1UL, size);

Y
Yu Yang 已提交
208 209
    Error err = softmax_.backward(argument_);
    if (!err.isOK()) return err;
Z
zhangjinchao01 已提交
210
  }
Y
Yu Yang 已提交
211
  return Error();
Z
zhangjinchao01 已提交
212 213 214
}
END_DEFINE_ACTIVATION(sequence_softmax)

C
caoying03 已提交
215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245
/*
 * @brief SoftSign Activation.
 * \f[
 * f(z) = \frac{z}{1 + |z|}
 * \f]
 */
BEGIN_DEFINE_ACTIVATION(softsign)
private:
MatrixPtr denominator_;

Error __must_check forward(Argument& act) {
  size_t height = act.value->getHeight();
  size_t width = act.value->getWidth();
  Matrix::resizeOrCreate(
      denominator_, height, width, false, useGpu(act.deviceId));
  denominator_->assign(*act.value);
  denominator_->abs2();
  denominator_->add(1.);

  act.value->dotDiv(*act.value, *denominator_);
  return Error();
}

Error __must_check backward(Argument& act) {
  denominator_->square2();
  denominator_->scalarDiv(*denominator_, 1.);
  act.grad->dotMul(*act.grad, *denominator_);
  return Error();
}
END_DEFINE_ACTIVATION(softsign)

Z
zhangjinchao01 已提交
246
/**
Q
qijun 已提交
247
 * @brief Relu Activation.
Z
zhangjinchao01 已提交
248 249 250 251 252 253 254 255 256
 * forward. y = max(0, z)
 *
 * derivative of relu is:
 *
 *    1 if z > 0
 *
 *    0 otherwise.
 */
BEGIN_DEFINE_ACTIVATION(relu)
Y
Yu Yang 已提交
257
Error __must_check forward(Argument& act) {
258
  act.value->relu(*act.value);
Y
Yu Yang 已提交
259
  return Error();
260
}
Z
zhangjinchao01 已提交
261

Y
Yu Yang 已提交
262
Error __must_check backward(Argument& act) {
263
  act.grad->reluDerivative(*act.value);
Y
Yu Yang 已提交
264
  return Error();
265
}
Z
zhangjinchao01 已提交
266 267 268
END_DEFINE_ACTIVATION(relu)

/**
Q
qijun 已提交
269
 * @brief BRelu Activation.
Z
zhangjinchao01 已提交
270 271 272 273 274 275 276 277 278 279 280 281
 *
 * forward. y = min(24, max(0, z))
 *
 * derivative of brelu is:
 *
 *    1 if 0 < z < 24
 *
 *    0 otherwise.
 *
 * TODO(yuyang18): Remove magic number 24 or make it configuable.
 */
BEGIN_DEFINE_ACTIVATION(brelu)
Y
Yu Yang 已提交
282
Error __must_check forward(Argument& act) {
283
  act.value->brelu(*act.value);
Y
Yu Yang 已提交
284
  return Error();
285
}
Z
zhangjinchao01 已提交
286

Y
Yu Yang 已提交
287
Error __must_check backward(Argument& act) {
288
  act.grad->breluDerivative(*act.value);
Y
Yu Yang 已提交
289
  return Error();
290
}
Z
zhangjinchao01 已提交
291 292 293
END_DEFINE_ACTIVATION(brelu)

/**
Q
qijun 已提交
294 295
 * @brief Tanh Activation.
 * \f[
Z
zhangjinchao01 已提交
296
 * f(z) = tanh(z)=\frac{e^z-e^{-z}}{e^z+e^{-z}}
Q
qijun 已提交
297
 * \f]
Z
zhangjinchao01 已提交
298 299
 */
BEGIN_DEFINE_ACTIVATION(tanh)
Y
Yu Yang 已提交
300
Error __must_check forward(Argument& act) {
301
  act.value->tanh(*act.value);
Y
Yu Yang 已提交
302
  return Error();
303
}
Z
zhangjinchao01 已提交
304

Y
Yu Yang 已提交
305
Error __must_check backward(Argument& act) {
306
  act.grad->tanhDerivative(*act.value);
Y
Yu Yang 已提交
307
  return Error();
308
}
Z
zhangjinchao01 已提交
309 310 311
END_DEFINE_ACTIVATION(tanh)

/**
Q
qijun 已提交
312 313
 * @brief Scaled Tanh Activation
 * \f[
Z
zhangjinchao01 已提交
314
 * f(z) = 1.7159 * tanh(2/3*z)
Q
qijun 已提交
315
 * \f]
Z
zhangjinchao01 已提交
316 317 318 319 320 321 322
 */
BEGIN_DEFINE_ACTIVATION(stanh)
private:
real a, b;

public:
ACTIVATION_CLASS_NAME(stanh)() : a(1.7159), b(2. / 3.) {}
Y
Yu Yang 已提交
323
Error __must_check forward(Argument& act) {
324
  act.value->scaledTanh(*act.value, a, b);
Y
Yu Yang 已提交
325
  return Error();
326
}
Z
zhangjinchao01 已提交
327

Y
Yu Yang 已提交
328
Error __must_check backward(Argument& act) {
Z
zhangjinchao01 已提交
329
  act.grad->scaledTanhDerivative(*act.value, a, b);
Y
Yu Yang 已提交
330
  return Error();
Z
zhangjinchao01 已提交
331 332 333 334
}
END_DEFINE_ACTIVATION(stanh)

/**
Q
qijun 已提交
335 336
 * @brief Soft Relu Activation.
 * \f[
Z
zhangjinchao01 已提交
337
 * f(z) = ln(1+e^z)
Q
qijun 已提交
338
 * \f]
Z
zhangjinchao01 已提交
339 340
 */
BEGIN_DEFINE_ACTIVATION(softrelu)
Y
Yu Yang 已提交
341
Error __must_check forward(Argument& act) {
342
  act.value->softrelu(*act.value);
Y
Yu Yang 已提交
343
  return Error();
344
}
Z
zhangjinchao01 已提交
345

Y
Yu Yang 已提交
346
Error __must_check backward(Argument& act) {
347
  act.grad->softreluDerivative(*act.value);
Y
Yu Yang 已提交
348
  return Error();
349
}
Z
zhangjinchao01 已提交
350 351 352
END_DEFINE_ACTIVATION(softrelu)

/**
Q
qijun 已提交
353
 * @brief Abs Activation.
Z
zhangjinchao01 已提交
354 355 356 357 358 359 360 361 362 363 364
 * Forward: f(z) = abs(z)
 *
 * Derivative:
 *
 *     1   if z>0
 *
 *    -1   if z<0
 *
 *     0   if z=0
 */
BEGIN_DEFINE_ACTIVATION(abs)
Y
Yu Yang 已提交
365
Error __must_check forward(Argument& act) {
Z
zhangjinchao01 已提交
366
  SetDevice device(act.deviceId);
367 368 369 370 371
  Matrix::resizeOrCreate(act.in,
                         act.value->getHeight(),
                         act.value->getWidth(),
                         /* trans */ false,
                         useGpu(act.deviceId));
Z
zhangjinchao01 已提交
372 373

  act.in->copyFrom(*act.value);
H
hedaoyuan 已提交
374
  act.value->abs2(*act.value);
Y
Yu Yang 已提交
375
  return Error();
Z
zhangjinchao01 已提交
376 377
}

Y
Yu Yang 已提交
378
Error __must_check backward(Argument& act) {
379
  act.grad->absDerivative(*act.in);
Y
Yu Yang 已提交
380
  return Error();
381
}
Z
zhangjinchao01 已提交
382 383 384
END_DEFINE_ACTIVATION(abs)

/**
Q
qijun 已提交
385 386
 * @brief Square Activation.
 * \f[
Z
zhangjinchao01 已提交
387
 * f(z) = z^2.
Q
qijun 已提交
388
 * \f]
Z
zhangjinchao01 已提交
389 390
 */
BEGIN_DEFINE_ACTIVATION(square)
Y
Yu Yang 已提交
391
Error __must_check forward(Argument& act) {
Z
zhangjinchao01 已提交
392
  SetDevice device(act.deviceId);
393 394 395 396 397
  Matrix::resizeOrCreate(act.in,
                         act.value->getHeight(),
                         act.value->getWidth(),
                         /* trans */ false,
                         useGpu(act.deviceId));
Z
zhangjinchao01 已提交
398 399

  act.in->copyFrom(*act.value);
H
hedaoyuan 已提交
400
  act.value->square2(*act.value);
Y
Yu Yang 已提交
401
  return Error();
Z
zhangjinchao01 已提交
402 403
}

Y
Yu Yang 已提交
404
Error __must_check backward(Argument& act) {
405
  act.grad->squareDerivative(*act.in);
Y
Yu Yang 已提交
406
  return Error();
407
}
Z
zhangjinchao01 已提交
408
END_DEFINE_ACTIVATION(square)
409

Q
qijun 已提交
410 411 412 413 414 415
/**
 * @brief Exponential Activation.
 * \f[
 * f(z) = e^z
 * \f]
 */
Z
zhangjinchao01 已提交
416
BEGIN_DEFINE_ACTIVATION(exponential)
Y
Yu Yang 已提交
417
Error __must_check forward(Argument& act) {
418
  act.value->exp2(*act.value);
Y
Yu Yang 已提交
419
  return Error();
420
}
Z
zhangjinchao01 已提交
421

Y
Yu Yang 已提交
422
Error __must_check backward(Argument& act) {
423
  act.grad->expDerivative(*act.value);
Y
Yu Yang 已提交
424
  return Error();
425
}
Z
zhangjinchao01 已提交
426 427
END_DEFINE_ACTIVATION(exponential)

X
xuwei06 已提交
428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465
/**
 * @brief Reciprocal Activation.
 * \f[
 * f(z) = 1/z
 * \f]
 */
BEGIN_DEFINE_ACTIVATION(reciprocal)
Error __must_check forward(Argument& act) {
  act.value->reciprocal2();
  return Error();
}

Error __must_check backward(Argument& act) {
  act.grad->dotMulSquare(*act.value);
  act.grad->neg();
  return Error();
}
END_DEFINE_ACTIVATION(reciprocal)

/**
 * @brief Square Root Activation.
 * \f[
 * f(z) = sqrt(z)
 * \f]
 */
BEGIN_DEFINE_ACTIVATION(sqrt)
Error __must_check forward(Argument& act) {
  act.value->sqrt2();
  return Error();
}

Error __must_check backward(Argument& act) {
  act.grad->dotDiv(*act.grad, *act.value);
  act.grad->mulScalar(0.5);
  return Error();
}
END_DEFINE_ACTIVATION(sqrt)

466 467 468 469 470 471 472
/**
 * @brief Logarithm Activation.
 * \f[
 * f(z) = log(z)
 * \f]
 */
BEGIN_DEFINE_ACTIVATION(log)
Y
Yu Yang 已提交
473
Error __must_check forward(Argument& act) {
474
  SetDevice device(act.deviceId);
475 476 477 478 479
  Matrix::resizeOrCreate(act.in,
                         act.value->getHeight(),
                         act.value->getWidth(),
                         /* trans */ false,
                         useGpu(act.deviceId));
480 481

  act.in->copyFrom(*act.value);
H
hedaoyuan 已提交
482
  act.value->log2(*act.value);
Y
Yu Yang 已提交
483
  return Error();
484 485
}

Y
Yu Yang 已提交
486
Error __must_check backward(Argument& act) {
487
  act.grad->dotDiv(*act.grad, *act.in);
Y
Yu Yang 已提交
488
  return Error();
489
}
490 491
END_DEFINE_ACTIVATION(log)

Z
zhangjinchao01 已提交
492
ActivationFunction* ActivationFunction::create(const std::string& type) {
493 494 495 496 497 498
#ifdef PADDLE_USE_MKLDNN
  if (!type.empty() && type.compare(0, 7, "mkldnn_") == 0) {
    return MKLDNNActivation::create(type);
  }
#endif

Z
zhangjinchao01 已提交
499 500 501
  return gActivationRegistrar.createByType(type);
}

502 503
std::vector<std::string> ActivationFunction::getAllRegisteredTypes() {
  std::vector<std::string> types;
504 505
  gActivationRegistrar.forEachType(
      [&](const std::string& type) { types.push_back(type); });
506 507 508
  return types;
}

Z
zhangjinchao01 已提交
509
}  // namespace paddle