CostLayer.h 10.4 KB
Newer Older
1
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Z
zhangjinchao01 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#pragma once

#include <memory>
#include <vector>
#include "Layer.h"

namespace paddle {

/**
 * Base class for a particular type of cost layer.
 * This type of cost should have one data layer, one label layer
 * and an optional weight layer as input.
 * The derived class should implemnt forwardImp() and backwardImp()
 * which calculate the cost for data and label. The weight is automatically
 * handled by the base class.
 */
class CostLayer : public Layer {
W
Wu Yi 已提交
32
 public:
Z
zhangjinchao01 已提交
33 34
  explicit CostLayer(const LayerConfig& config) : Layer(config) {}

Y
Yu Yang 已提交
35 36
  bool init(const LayerMap& layerMap,
            const ParameterMap& parameterMap) override;
Z
zhangjinchao01 已提交
37 38 39 40 41

  LayerPtr getOutputLayer() { return inputLayers_[0]; }

  LayerPtr getLabelLayer() { return inputLayers_[1]; }

Y
Yu Yang 已提交
42
  void forward(PassType passType) override;
Z
zhangjinchao01 已提交
43

Y
Yu Yang 已提交
44
  void backward(const UpdateCallback& callback = nullptr) override;
Z
zhangjinchao01 已提交
45

46 47
  virtual void forwardImp(Matrix& outputValue,
                          Argument& label,
Z
zhangjinchao01 已提交
48 49
                          Matrix& cost) = 0;

50 51
  virtual void backwardImp(Matrix& outputValue,
                           Argument& label,
Z
zhangjinchao01 已提交
52 53
                           Matrix& outputGrad) = 0;

W
Wu Yi 已提交
54
 protected:
Z
zhangjinchao01 已提交
55 56 57 58
  LayerPtr weightLayer_;
  real coeff_;
};

59 60 61 62 63 64 65
/**
 * The cross-entropy loss for multi-class classification task.
 * The loss function is:
 *
 * \f[
 * L = - \sum_{i}{t_{k} * log(P(y=k))}
 * \f]
Z
zhangjinchao01 已提交
66 67
 */
class MultiClassCrossEntropy : public CostLayer {
W
Wu Yi 已提交
68
 public:
Z
zhangjinchao01 已提交
69 70 71
  explicit MultiClassCrossEntropy(const LayerConfig& config)
      : CostLayer(config) {}

Y
Yu Yang 已提交
72 73
  bool init(const LayerMap& layerMap,
            const ParameterMap& parameterMap) override;
Z
zhangjinchao01 已提交
74

Y
Yu Yang 已提交
75
  void forwardImp(Matrix& output, Argument& label, Matrix& cost) override;
Z
zhangjinchao01 已提交
76

Y
Yu Yang 已提交
77 78 79
  void backwardImp(Matrix& outputValue,
                   Argument& label,
                   Matrix& outputGrad) override;
Z
zhangjinchao01 已提交
80 81
};

82 83 84 85 86 87 88 89 90 91 92 93
/**
 * The cross-entropy with self-normalization for multi-class classification.
 *
 * The loss function is:
 * \f[
 * L = \sum_{i}[-log(P(x_{i})) + alpha * log(Z(x_{i})^2)]
 * \f]
 *
 * The \f$Z(x)\f$ is the softmax normalizer.
 *
 * [1] Jacob Devlin, Rabih Zbib, Zhongqiang Huang, Thomas Lamar,
 *     Richard Schwartz, and John Makhoul. Fast and robust neural
D
dangqingqing 已提交
94 95
 *     network joint models for statistical machine translation.
 *     In Proceedings of the ACL 2014 Conference.
Z
zhangjinchao01 已提交
96 97
 */
class MultiClassCrossEntropyWithSelfNorm : public CostLayer {
W
Wu Yi 已提交
98
 public:
Z
zhangjinchao01 已提交
99 100 101
  explicit MultiClassCrossEntropyWithSelfNorm(const LayerConfig& config)
      : CostLayer(config) {}

Y
Yu Yang 已提交
102 103
  bool init(const LayerMap& layerMap,
            const ParameterMap& parameterMap) override;
Z
zhangjinchao01 已提交
104

Y
Yu Yang 已提交
105
  void forwardImp(Matrix& output, Argument& label, Matrix& cost) override;
Z
zhangjinchao01 已提交
106

Y
Yu Yang 已提交
107 108 109
  void backwardImp(Matrix& outputValue,
                   Argument& label,
                   Matrix& outputGrad) override;
Z
zhangjinchao01 已提交
110

W
Wu Yi 已提交
111
 protected:
Z
zhangjinchao01 已提交
112 113 114 115
  MatrixPtr sftMaxSum_;
  MatrixPtr sumInv_;
};

116 117 118 119 120
/**
 * The cross-entropy for soft binary class.
 * \f[
 * L = \sum_i (\sum_j -y_j(i)*log(x_j(i))-(1-y_j(i))*log(1-x_j(i)))
 * \f]
Z
zhangjinchao01 已提交
121 122
 */
class SoftBinaryClassCrossEntropy : public CostLayer {
W
Wu Yi 已提交
123
 public:
Z
zhangjinchao01 已提交
124 125 126
  explicit SoftBinaryClassCrossEntropy(const LayerConfig& config)
      : CostLayer(config) {}

Y
Yu Yang 已提交
127 128
  bool init(const LayerMap& layerMap,
            const ParameterMap& parameterMap) override;
Z
zhangjinchao01 已提交
129

Y
Yu Yang 已提交
130
  void forwardImp(Matrix& output, Argument& label, Matrix& cost) override;
Z
zhangjinchao01 已提交
131

Y
Yu Yang 已提交
132 133 134
  void backwardImp(Matrix& outputValue,
                   Argument& label,
                   Matrix& outputGrad) override;
Z
zhangjinchao01 已提交
135

W
Wu Yi 已提交
136
 protected:
Z
zhangjinchao01 已提交
137 138 139
  MatrixPtr targetPerDim_;
};

140 141 142 143
/**
 * This cost layer compute Euclidean (L2) loss for real-valued regression
 * tasks.
 * \f[
X
xuwei06 已提交
144
 * L = \sum_{i=1}^N {|| \hat{y}_i - y_i||_2^2}
145 146
 * \f]
 */
Z
zhangjinchao01 已提交
147
class SumOfSquaresCostLayer : public CostLayer {
W
Wu Yi 已提交
148
 public:
Z
zhangjinchao01 已提交
149 150 151
  explicit SumOfSquaresCostLayer(const LayerConfig& config)
      : CostLayer(config) {}

Y
Yu Yang 已提交
152 153
  bool init(const LayerMap& layerMap,
            const ParameterMap& parameterMap) override;
Z
zhangjinchao01 已提交
154

Y
Yu Yang 已提交
155
  void forwardImp(Matrix& output, Argument& label, Matrix& cost) override;
Z
zhangjinchao01 已提交
156

Y
Yu Yang 已提交
157 158 159
  void backwardImp(Matrix& outputValue,
                   Argument& label,
                   Matrix& outputGrad) override;
Z
zhangjinchao01 已提交
160 161
};

G
gaoyuan 已提交
162 163 164 165 166
/**
 * This cost layer compute smooth L1 loss for real-valued regression
 * tasks.
 * \f[
 * L =
D
dangqingqing 已提交
167 168
 *   0.5 * x^2    if / -1 < |x| < 1 /
 *   |x| - 0.5    / otherwise /
G
gaoyuan 已提交
169
 * \f]
D
dangqingqing 已提交
170 171
 *
 * x = output - label
G
gaoyuan 已提交
172 173
 */
class SmoothL1CostLayer : public CostLayer {
W
Wu Yi 已提交
174
 public:
G
gaoyuan 已提交
175 176 177 178 179 180 181 182 183 184 185 186
  explicit SmoothL1CostLayer(const LayerConfig& config) : CostLayer(config) {}

  bool init(const LayerMap& layerMap,
            const ParameterMap& parameterMap) override;

  void forwardImp(Matrix& output, Argument& label, Matrix& cost) override;

  void backwardImp(Matrix& outputValue,
                   Argument& label,
                   Matrix& outputGrad) override;
};

187 188 189 190 191 192 193 194 195 196 197
/**
 * A cost layer for learning to rank (LTR) task. This layer contains at leat
 * three inputs.
 * \f[
 *  C_{i,j} = -\tilde{P_{ij}} * o_{i,j} + log(1 + e^{o_{i,j}}) \\
 *  o_{i,j} =  o_i - o_j  \\
 *  \tilde{P_{i,j}} = \left \{0, 0.5, 1 \right \} \ or \ \left \{0, 1 \right \}
 * \f]
 *
 * [1]. Chris Burges, Tal Shaked, Erin Renshaw, et al. Learning to
 *      Rank useing Gradient Descent.
Z
zhangjinchao01 已提交
198 199
 */
class RankingCost : public Layer {
W
Wu Yi 已提交
200
 public:
Z
zhangjinchao01 已提交
201 202
  explicit RankingCost(const LayerConfig& config) : Layer(config) {}

Y
Yu Yang 已提交
203 204
  bool init(const LayerMap& layerMap,
            const ParameterMap& parameterMap) override;
Z
zhangjinchao01 已提交
205 206 207 208 209

  LayerPtr getOutputLayer(size_t i) { return inputLayers_[i]; }

  LayerPtr getLabelLayer() { return inputLayers_[2]; }

Y
Yu Yang 已提交
210
  void forward(PassType passType) override;
Z
zhangjinchao01 已提交
211

Y
Yu Yang 已提交
212
  void backward(const UpdateCallback& callback = nullptr) override;
Z
zhangjinchao01 已提交
213

Y
Yu Yang 已提交
214
  void onPassEnd() override;
Z
zhangjinchao01 已提交
215 216 217 218 219 220 221 222 223 224 225 226 227

  void forwardImp(Matrix& output, Argument& label, Matrix& cost) {
    (void)output;
    (void)label;
    (void)cost;
  }

  void backwardImp(Matrix& outputValue, Argument& label, Matrix& outputGrad) {
    (void)outputValue;
    (void)label;
    (void)outputGrad;
  }

W
Wu Yi 已提交
228
 private:
Z
zhangjinchao01 已提交
229 230 231 232
  double posPairCount_;
  double negPairCount_;
  MatrixPtr margin_;
  MatrixPtr marginGrad_;
233
  /// if input label is put in ids (not value), copy to this buffer.
Z
zhangjinchao01 已提交
234 235 236 237
  MatrixPtr labelBuf_;
  LayerPtr weightLayer_;
};

238 239 240 241 242 243 244 245 246 247 248 249 250 251
/**
 * LambdaRank os a method for learning arbitrary information retrieval
 * measures. It can be applied to any algorithm that learns through gradient
 * descent. LambdaRank is a listwise method, in that the cost depends on the
 * sorted order of the documents. LambdaRank gives the gradient of cost
 * function:
 *
 * \f[
 * \lambda_{ij} = \frac{1}{1 + e^{o_i - o_j}} \left| \Delta_{NDCG} \right|
 * \f]
 *
 * [1] Christopher J.C. Burges, Robert Ragno, Quoc Viet Le. Learning to Rank
 *     with Nonsmooth Cost Functions.
 */
Z
zhangjinchao01 已提交
252
class LambdaCost : public Layer {
W
Wu Yi 已提交
253
 public:
Z
zhangjinchao01 已提交
254 255
  explicit LambdaCost(const LayerConfig& config) : Layer(config) {}

Y
Yu Yang 已提交
256 257
  bool init(const LayerMap& layerMap,
            const ParameterMap& parameterMap) override;
Z
zhangjinchao01 已提交
258 259 260 261 262

  LayerPtr getOutputLayer() { return inputLayers_[0]; }

  LayerPtr getScoreLayer() { return inputLayers_[1]; }

Y
Yu Yang 已提交
263
  void forward(PassType passType) override;
Z
zhangjinchao01 已提交
264

Y
Yu Yang 已提交
265
  void backward(const UpdateCallback& callback = nullptr) override;
Z
zhangjinchao01 已提交
266 267

  real calcNDCG(const real* outputScore, const real* score, int size);
268 269 270
  void calcGrad(const real* outputScore,
                const real* score,
                real* gradData,
Z
zhangjinchao01 已提交
271 272
                int size);

W
Wu Yi 已提交
273
 private:
Z
zhangjinchao01 已提交
274 275 276 277 278 279 280 281 282
  MatrixPtr marginGrad_;
  int truncationSize_;
  int maxSortSize_;
  std::vector<std::pair<real, int>> scorePair_;
  std::vector<std::pair<real, int>> outputScorePair_;
  std::vector<real> scoreVec_;
};

/**
283 284 285 286 287
 * Cross entropy for multi binary labels.
 * \f[
 * cost[i] = -sum(label[i][j]*log(output[i][j]) +
 *            (1-label[i][j])*log(1-output[i][j]))
 * \f]
Z
zhangjinchao01 已提交
288 289
 */
class MultiBinaryLabelCrossEntropy : public CostLayer {
W
Wu Yi 已提交
290
 protected:
Z
zhangjinchao01 已提交
291 292
  MatrixPtr targetPerDim_;

W
Wu Yi 已提交
293
 public:
Z
zhangjinchao01 已提交
294 295 296
  explicit MultiBinaryLabelCrossEntropy(const LayerConfig& config)
      : CostLayer(config) {}

Y
Yu Yang 已提交
297 298
  bool init(const LayerMap& layerMap,
            const ParameterMap& parameterMap) override;
Z
zhangjinchao01 已提交
299

Y
Yu Yang 已提交
300
  void forwardImp(Matrix& output, Argument& label, Matrix& cost) override;
Z
zhangjinchao01 已提交
301

Y
Yu Yang 已提交
302 303 304
  void backwardImp(Matrix& outputValue,
                   Argument& label,
                   Matrix& outputGrad) override;
Z
zhangjinchao01 已提交
305 306
};

307 308 309 310
/*
 * A base layer for HuberRegressionLoss and HuberTwoClassification.
 */
class HuberCost : public CostLayer {
W
Wu Yi 已提交
311
 public:
312 313 314 315 316 317 318 319 320
  std::vector<Argument> tmpCpuInput_;

  explicit HuberCost(const LayerConfig& config) : CostLayer(config) {}

  bool init(const LayerMap& layerMap,
            const ParameterMap& parameterMap) override;

  void forwardImp(Matrix& output, Argument& label, Matrix& cost) override;

Q
qijun 已提交
321 322 323
  void backwardImp(Matrix& outputValue,
                   Argument& label,
                   Matrix& outputGrad) override {}
324 325
};

L
Luo Tao 已提交
326 327 328 329 330 331 332 333
/**
 * Huber loss for robust regression.
 *
 * Given output f(x), label y and delta, the loss is:
 * Loss = 0.5 * (1 - y * f)^2, if abs(y - f) <= delta \\
 * Loss = delta * abs(y - f) - 0.5 * delta^2, otherwise
 */
class HuberRegressionLoss : public HuberCost {
W
Wu Yi 已提交
334
 public:
L
Luo Tao 已提交
335 336 337 338 339 340 341 342 343 344 345
  explicit HuberRegressionLoss(const LayerConfig& config) : HuberCost(config) {}

  bool init(const LayerMap& layerMap,
            const ParameterMap& parameterMap) override;

  void forwardImp(Matrix& output, Argument& label, Matrix& cost) override;

  void backwardImp(Matrix& outputValue,
                   Argument& label,
                   Matrix& outputGrad) override;

W
Wu Yi 已提交
346
 protected:
L
Luo Tao 已提交
347 348 349
  real delta_;
};

350 351
/**
 * Huber loss for robust 2-classes classification.
Z
zhangjinchao01 已提交
352
 *
353 354 355 356
 * For label={0, 1}, let y=2*label-1. Given output f(x), the loss is:
 * Loss = 4 * y * f, if y* f < -1 \\
 * Loss = (1 - y * f)^2, if -1 < y * f < 1  \\
 * Loss = 0, otherwise
Z
zhangjinchao01 已提交
357
 */
358
class HuberTwoClassification : public HuberCost {
W
Wu Yi 已提交
359
 public:
360
  explicit HuberTwoClassification(const LayerConfig& config)
361
      : HuberCost(config) {}
Z
zhangjinchao01 已提交
362

Y
Yu Yang 已提交
363 364
  bool init(const LayerMap& layerMap,
            const ParameterMap& parameterMap) override;
Z
zhangjinchao01 已提交
365

Y
Yu Yang 已提交
366
  void forwardImp(Matrix& output, Argument& label, Matrix& cost) override;
Z
zhangjinchao01 已提交
367

Y
Yu Yang 已提交
368 369 370
  void backwardImp(Matrix& outputValue,
                   Argument& label,
                   Matrix& outputGrad) override;
Z
zhangjinchao01 已提交
371 372 373 374
};

typedef std::shared_ptr<CostLayer> CostLayerPtr;
}  // namespace paddle