CostLayer.h 10.4 KB
Newer Older
1
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Z
zhangjinchao01 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#pragma once

#include <memory>
#include <vector>
#include "Layer.h"

namespace paddle {

/**
 * Base class for a particular type of cost layer.
 * This type of cost should have one data layer, one label layer
 * and an optional weight layer as input.
 * The derived class should implemnt forwardImp() and backwardImp()
 * which calculate the cost for data and label. The weight is automatically
 * handled by the base class.
 */
class CostLayer : public Layer {
public:
  explicit CostLayer(const LayerConfig& config) : Layer(config) {}

Y
Yu Yang 已提交
35 36
  bool init(const LayerMap& layerMap,
            const ParameterMap& parameterMap) override;
Z
zhangjinchao01 已提交
37 38 39 40 41

  LayerPtr getOutputLayer() { return inputLayers_[0]; }

  LayerPtr getLabelLayer() { return inputLayers_[1]; }

Y
Yu Yang 已提交
42
  void forward(PassType passType) override;
Z
zhangjinchao01 已提交
43

Y
Yu Yang 已提交
44
  void backward(const UpdateCallback& callback = nullptr) override;
Z
zhangjinchao01 已提交
45

46 47
  virtual void forwardImp(Matrix& outputValue,
                          Argument& label,
Z
zhangjinchao01 已提交
48 49
                          Matrix& cost) = 0;

50 51
  virtual void backwardImp(Matrix& outputValue,
                           Argument& label,
Z
zhangjinchao01 已提交
52 53 54 55 56 57 58
                           Matrix& outputGrad) = 0;

protected:
  LayerPtr weightLayer_;
  real coeff_;
};

59 60 61 62 63 64 65
/**
 * The cross-entropy loss for multi-class classification task.
 * The loss function is:
 *
 * \f[
 * L = - \sum_{i}{t_{k} * log(P(y=k))}
 * \f]
Z
zhangjinchao01 已提交
66 67 68 69 70 71
 */
class MultiClassCrossEntropy : public CostLayer {
public:
  explicit MultiClassCrossEntropy(const LayerConfig& config)
      : CostLayer(config) {}

Y
Yu Yang 已提交
72 73
  bool init(const LayerMap& layerMap,
            const ParameterMap& parameterMap) override;
Z
zhangjinchao01 已提交
74

Y
Yu Yang 已提交
75
  void forwardImp(Matrix& output, Argument& label, Matrix& cost) override;
Z
zhangjinchao01 已提交
76

Y
Yu Yang 已提交
77 78 79
  void backwardImp(Matrix& outputValue,
                   Argument& label,
                   Matrix& outputGrad) override;
Z
zhangjinchao01 已提交
80 81
};

82 83 84 85 86 87 88 89 90 91 92 93
/**
 * The cross-entropy with self-normalization for multi-class classification.
 *
 * The loss function is:
 * \f[
 * L = \sum_{i}[-log(P(x_{i})) + alpha * log(Z(x_{i})^2)]
 * \f]
 *
 * The \f$Z(x)\f$ is the softmax normalizer.
 *
 * [1] Jacob Devlin, Rabih Zbib, Zhongqiang Huang, Thomas Lamar,
 *     Richard Schwartz, and John Makhoul. Fast and robust neural
D
dangqingqing 已提交
94 95
 *     network joint models for statistical machine translation.
 *     In Proceedings of the ACL 2014 Conference.
Z
zhangjinchao01 已提交
96 97 98 99 100 101
 */
class MultiClassCrossEntropyWithSelfNorm : public CostLayer {
public:
  explicit MultiClassCrossEntropyWithSelfNorm(const LayerConfig& config)
      : CostLayer(config) {}

Y
Yu Yang 已提交
102 103
  bool init(const LayerMap& layerMap,
            const ParameterMap& parameterMap) override;
Z
zhangjinchao01 已提交
104

Y
Yu Yang 已提交
105
  void forwardImp(Matrix& output, Argument& label, Matrix& cost) override;
Z
zhangjinchao01 已提交
106

Y
Yu Yang 已提交
107 108 109
  void backwardImp(Matrix& outputValue,
                   Argument& label,
                   Matrix& outputGrad) override;
Z
zhangjinchao01 已提交
110 111 112 113 114 115

protected:
  MatrixPtr sftMaxSum_;
  MatrixPtr sumInv_;
};

116 117 118 119 120
/**
 * The cross-entropy for soft binary class.
 * \f[
 * L = \sum_i (\sum_j -y_j(i)*log(x_j(i))-(1-y_j(i))*log(1-x_j(i)))
 * \f]
Z
zhangjinchao01 已提交
121 122 123 124 125 126
 */
class SoftBinaryClassCrossEntropy : public CostLayer {
public:
  explicit SoftBinaryClassCrossEntropy(const LayerConfig& config)
      : CostLayer(config) {}

Y
Yu Yang 已提交
127 128
  bool init(const LayerMap& layerMap,
            const ParameterMap& parameterMap) override;
Z
zhangjinchao01 已提交
129

Y
Yu Yang 已提交
130
  void forwardImp(Matrix& output, Argument& label, Matrix& cost) override;
Z
zhangjinchao01 已提交
131

Y
Yu Yang 已提交
132 133 134
  void backwardImp(Matrix& outputValue,
                   Argument& label,
                   Matrix& outputGrad) override;
Z
zhangjinchao01 已提交
135 136 137 138 139

protected:
  MatrixPtr targetPerDim_;
};

140 141 142 143
/**
 * This cost layer compute Euclidean (L2) loss for real-valued regression
 * tasks.
 * \f[
X
xuwei06 已提交
144
 * L = \sum_{i=1}^N {|| \hat{y}_i - y_i||_2^2}
145 146
 * \f]
 */
Z
zhangjinchao01 已提交
147 148 149 150 151
class SumOfSquaresCostLayer : public CostLayer {
public:
  explicit SumOfSquaresCostLayer(const LayerConfig& config)
      : CostLayer(config) {}

Y
Yu Yang 已提交
152 153
  bool init(const LayerMap& layerMap,
            const ParameterMap& parameterMap) override;
Z
zhangjinchao01 已提交
154

Y
Yu Yang 已提交
155
  void forwardImp(Matrix& output, Argument& label, Matrix& cost) override;
Z
zhangjinchao01 已提交
156

Y
Yu Yang 已提交
157 158 159
  void backwardImp(Matrix& outputValue,
                   Argument& label,
                   Matrix& outputGrad) override;
Z
zhangjinchao01 已提交
160 161
};

G
gaoyuan 已提交
162 163 164 165 166
/**
 * This cost layer compute smooth L1 loss for real-valued regression
 * tasks.
 * \f[
 * L =
D
dangqingqing 已提交
167 168
 *   0.5 * x^2    if / -1 < |x| < 1 /
 *   |x| - 0.5    / otherwise /
G
gaoyuan 已提交
169
 * \f]
D
dangqingqing 已提交
170 171
 *
 * x = output - label
G
gaoyuan 已提交
172 173 174 175 176 177 178 179 180 181 182 183 184 185 186
 */
class SmoothL1CostLayer : public CostLayer {
public:
  explicit SmoothL1CostLayer(const LayerConfig& config) : CostLayer(config) {}

  bool init(const LayerMap& layerMap,
            const ParameterMap& parameterMap) override;

  void forwardImp(Matrix& output, Argument& label, Matrix& cost) override;

  void backwardImp(Matrix& outputValue,
                   Argument& label,
                   Matrix& outputGrad) override;
};

187 188 189 190 191 192 193 194 195 196 197
/**
 * A cost layer for learning to rank (LTR) task. This layer contains at leat
 * three inputs.
 * \f[
 *  C_{i,j} = -\tilde{P_{ij}} * o_{i,j} + log(1 + e^{o_{i,j}}) \\
 *  o_{i,j} =  o_i - o_j  \\
 *  \tilde{P_{i,j}} = \left \{0, 0.5, 1 \right \} \ or \ \left \{0, 1 \right \}
 * \f]
 *
 * [1]. Chris Burges, Tal Shaked, Erin Renshaw, et al. Learning to
 *      Rank useing Gradient Descent.
Z
zhangjinchao01 已提交
198 199 200 201 202
 */
class RankingCost : public Layer {
public:
  explicit RankingCost(const LayerConfig& config) : Layer(config) {}

Y
Yu Yang 已提交
203 204
  bool init(const LayerMap& layerMap,
            const ParameterMap& parameterMap) override;
Z
zhangjinchao01 已提交
205 206 207 208 209

  LayerPtr getOutputLayer(size_t i) { return inputLayers_[i]; }

  LayerPtr getLabelLayer() { return inputLayers_[2]; }

Y
Yu Yang 已提交
210
  void forward(PassType passType) override;
Z
zhangjinchao01 已提交
211

Y
Yu Yang 已提交
212
  void backward(const UpdateCallback& callback = nullptr) override;
Z
zhangjinchao01 已提交
213

Y
Yu Yang 已提交
214
  void onPassEnd() override;
Z
zhangjinchao01 已提交
215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232

  void forwardImp(Matrix& output, Argument& label, Matrix& cost) {
    (void)output;
    (void)label;
    (void)cost;
  }

  void backwardImp(Matrix& outputValue, Argument& label, Matrix& outputGrad) {
    (void)outputValue;
    (void)label;
    (void)outputGrad;
  }

private:
  double posPairCount_;
  double negPairCount_;
  MatrixPtr margin_;
  MatrixPtr marginGrad_;
233
  /// if input label is put in ids (not value), copy to this buffer.
Z
zhangjinchao01 已提交
234 235 236 237
  MatrixPtr labelBuf_;
  LayerPtr weightLayer_;
};

238 239 240 241 242 243 244 245 246 247 248 249 250 251
/**
 * LambdaRank os a method for learning arbitrary information retrieval
 * measures. It can be applied to any algorithm that learns through gradient
 * descent. LambdaRank is a listwise method, in that the cost depends on the
 * sorted order of the documents. LambdaRank gives the gradient of cost
 * function:
 *
 * \f[
 * \lambda_{ij} = \frac{1}{1 + e^{o_i - o_j}} \left| \Delta_{NDCG} \right|
 * \f]
 *
 * [1] Christopher J.C. Burges, Robert Ragno, Quoc Viet Le. Learning to Rank
 *     with Nonsmooth Cost Functions.
 */
Z
zhangjinchao01 已提交
252 253 254 255
class LambdaCost : public Layer {
public:
  explicit LambdaCost(const LayerConfig& config) : Layer(config) {}

Y
Yu Yang 已提交
256 257
  bool init(const LayerMap& layerMap,
            const ParameterMap& parameterMap) override;
Z
zhangjinchao01 已提交
258 259 260 261 262

  LayerPtr getOutputLayer() { return inputLayers_[0]; }

  LayerPtr getScoreLayer() { return inputLayers_[1]; }

Y
Yu Yang 已提交
263
  void forward(PassType passType) override;
Z
zhangjinchao01 已提交
264

Y
Yu Yang 已提交
265
  void backward(const UpdateCallback& callback = nullptr) override;
Z
zhangjinchao01 已提交
266 267

  real calcNDCG(const real* outputScore, const real* score, int size);
268 269 270
  void calcGrad(const real* outputScore,
                const real* score,
                real* gradData,
Z
zhangjinchao01 已提交
271 272 273 274 275 276 277 278 279 280 281 282
                int size);

private:
  MatrixPtr marginGrad_;
  int truncationSize_;
  int maxSortSize_;
  std::vector<std::pair<real, int>> scorePair_;
  std::vector<std::pair<real, int>> outputScorePair_;
  std::vector<real> scoreVec_;
};

/**
283 284 285 286 287
 * Cross entropy for multi binary labels.
 * \f[
 * cost[i] = -sum(label[i][j]*log(output[i][j]) +
 *            (1-label[i][j])*log(1-output[i][j]))
 * \f]
Z
zhangjinchao01 已提交
288 289 290 291 292 293 294 295 296
 */
class MultiBinaryLabelCrossEntropy : public CostLayer {
protected:
  MatrixPtr targetPerDim_;

public:
  explicit MultiBinaryLabelCrossEntropy(const LayerConfig& config)
      : CostLayer(config) {}

Y
Yu Yang 已提交
297 298
  bool init(const LayerMap& layerMap,
            const ParameterMap& parameterMap) override;
Z
zhangjinchao01 已提交
299

Y
Yu Yang 已提交
300
  void forwardImp(Matrix& output, Argument& label, Matrix& cost) override;
Z
zhangjinchao01 已提交
301

Y
Yu Yang 已提交
302 303 304
  void backwardImp(Matrix& outputValue,
                   Argument& label,
                   Matrix& outputGrad) override;
Z
zhangjinchao01 已提交
305 306
};

307 308 309 310 311 312 313 314 315 316 317 318 319 320
/*
 * A base layer for HuberRegressionLoss and HuberTwoClassification.
 */
class HuberCost : public CostLayer {
public:
  std::vector<Argument> tmpCpuInput_;

  explicit HuberCost(const LayerConfig& config) : CostLayer(config) {}

  bool init(const LayerMap& layerMap,
            const ParameterMap& parameterMap) override;

  void forwardImp(Matrix& output, Argument& label, Matrix& cost) override;

321 322 323
  void backwardImp(Matrix& outputValue,
                   Argument& label,
                   Matrix& outputGrad) override {}
324 325
};

L
Luo Tao 已提交
326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349
/**
 * Huber loss for robust regression.
 *
 * Given output f(x), label y and delta, the loss is:
 * Loss = 0.5 * (1 - y * f)^2, if abs(y - f) <= delta \\
 * Loss = delta * abs(y - f) - 0.5 * delta^2, otherwise
 */
class HuberRegressionLoss : public HuberCost {
public:
  explicit HuberRegressionLoss(const LayerConfig& config) : HuberCost(config) {}

  bool init(const LayerMap& layerMap,
            const ParameterMap& parameterMap) override;

  void forwardImp(Matrix& output, Argument& label, Matrix& cost) override;

  void backwardImp(Matrix& outputValue,
                   Argument& label,
                   Matrix& outputGrad) override;

protected:
  real delta_;
};

350 351
/**
 * Huber loss for robust 2-classes classification.
Z
zhangjinchao01 已提交
352
 *
353 354 355 356
 * For label={0, 1}, let y=2*label-1. Given output f(x), the loss is:
 * Loss = 4 * y * f, if y* f < -1 \\
 * Loss = (1 - y * f)^2, if -1 < y * f < 1  \\
 * Loss = 0, otherwise
Z
zhangjinchao01 已提交
357
 */
358
class HuberTwoClassification : public HuberCost {
Z
zhangjinchao01 已提交
359
public:
360
  explicit HuberTwoClassification(const LayerConfig& config)
361
      : HuberCost(config) {}
Z
zhangjinchao01 已提交
362

Y
Yu Yang 已提交
363 364
  bool init(const LayerMap& layerMap,
            const ParameterMap& parameterMap) override;
Z
zhangjinchao01 已提交
365

Y
Yu Yang 已提交
366
  void forwardImp(Matrix& output, Argument& label, Matrix& cost) override;
Z
zhangjinchao01 已提交
367

Y
Yu Yang 已提交
368 369 370
  void backwardImp(Matrix& outputValue,
                   Argument& label,
                   Matrix& outputGrad) override;
Z
zhangjinchao01 已提交
371 372 373 374
};

typedef std::shared_ptr<CostLayer> CostLayerPtr;
}  // namespace paddle