CostLayer.h 8.9 KB
Newer Older
1
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Z
zhangjinchao01 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#pragma once

#include <memory>
#include <vector>
#include "Layer.h"

namespace paddle {

/**
 * Base class for a particular type of cost layer.
 * This type of cost should have one data layer, one label layer
 * and an optional weight layer as input.
 * The derived class should implemnt forwardImp() and backwardImp()
 * which calculate the cost for data and label. The weight is automatically
 * handled by the base class.
 */
class CostLayer : public Layer {
public:
  explicit CostLayer(const LayerConfig& config) : Layer(config) {}

Y
Yu Yang 已提交
35 36
  bool init(const LayerMap& layerMap,
            const ParameterMap& parameterMap) override;
Z
zhangjinchao01 已提交
37 38 39 40 41

  LayerPtr getOutputLayer() { return inputLayers_[0]; }

  LayerPtr getLabelLayer() { return inputLayers_[1]; }

Y
Yu Yang 已提交
42
  void forward(PassType passType) override;
Z
zhangjinchao01 已提交
43

Y
Yu Yang 已提交
44
  void backward(const UpdateCallback& callback = nullptr) override;
Z
zhangjinchao01 已提交
45

46 47
  virtual void forwardImp(Matrix& outputValue,
                          Argument& label,
Z
zhangjinchao01 已提交
48 49
                          Matrix& cost) = 0;

50 51
  virtual void backwardImp(Matrix& outputValue,
                           Argument& label,
Z
zhangjinchao01 已提交
52 53 54 55 56 57 58
                           Matrix& outputGrad) = 0;

protected:
  LayerPtr weightLayer_;
  real coeff_;
};

59 60 61 62 63 64 65
/**
 * The cross-entropy loss for multi-class classification task.
 * The loss function is:
 *
 * \f[
 * L = - \sum_{i}{t_{k} * log(P(y=k))}
 * \f]
Z
zhangjinchao01 已提交
66 67 68 69 70 71
 */
class MultiClassCrossEntropy : public CostLayer {
public:
  explicit MultiClassCrossEntropy(const LayerConfig& config)
      : CostLayer(config) {}

Y
Yu Yang 已提交
72 73
  bool init(const LayerMap& layerMap,
            const ParameterMap& parameterMap) override;
Z
zhangjinchao01 已提交
74

Y
Yu Yang 已提交
75
  void forwardImp(Matrix& output, Argument& label, Matrix& cost) override;
Z
zhangjinchao01 已提交
76

Y
Yu Yang 已提交
77 78 79
  void backwardImp(Matrix& outputValue,
                   Argument& label,
                   Matrix& outputGrad) override;
Z
zhangjinchao01 已提交
80 81
};

82 83 84 85 86 87 88 89 90 91 92 93 94 95
/**
 * The cross-entropy with self-normalization for multi-class classification.
 *
 * The loss function is:
 * \f[
 * L = \sum_{i}[-log(P(x_{i})) + alpha * log(Z(x_{i})^2)]
 * \f]
 *
 * The \f$Z(x)\f$ is the softmax normalizer.
 *
 * [1] Jacob Devlin, Rabih Zbib, Zhongqiang Huang, Thomas Lamar,
 *     Richard Schwartz, and John Makhoul. Fast and robust neural
 *     network joint models for statistical machine translation.
 *     In Proceedings of the ACL 2014 Conference.
Z
zhangjinchao01 已提交
96 97 98 99 100 101
 */
class MultiClassCrossEntropyWithSelfNorm : public CostLayer {
public:
  explicit MultiClassCrossEntropyWithSelfNorm(const LayerConfig& config)
      : CostLayer(config) {}

Y
Yu Yang 已提交
102 103
  bool init(const LayerMap& layerMap,
            const ParameterMap& parameterMap) override;
Z
zhangjinchao01 已提交
104

Y
Yu Yang 已提交
105
  void forwardImp(Matrix& output, Argument& label, Matrix& cost) override;
Z
zhangjinchao01 已提交
106

Y
Yu Yang 已提交
107 108 109
  void backwardImp(Matrix& outputValue,
                   Argument& label,
                   Matrix& outputGrad) override;
Z
zhangjinchao01 已提交
110 111 112 113 114 115

protected:
  MatrixPtr sftMaxSum_;
  MatrixPtr sumInv_;
};

116 117 118 119 120
/**
 * The cross-entropy for soft binary class.
 * \f[
 * L = \sum_i (\sum_j -y_j(i)*log(x_j(i))-(1-y_j(i))*log(1-x_j(i)))
 * \f]
Z
zhangjinchao01 已提交
121 122 123 124 125 126
 */
class SoftBinaryClassCrossEntropy : public CostLayer {
public:
  explicit SoftBinaryClassCrossEntropy(const LayerConfig& config)
      : CostLayer(config) {}

Y
Yu Yang 已提交
127 128
  bool init(const LayerMap& layerMap,
            const ParameterMap& parameterMap) override;
Z
zhangjinchao01 已提交
129

Y
Yu Yang 已提交
130
  void forwardImp(Matrix& output, Argument& label, Matrix& cost) override;
Z
zhangjinchao01 已提交
131

Y
Yu Yang 已提交
132 133 134
  void backwardImp(Matrix& outputValue,
                   Argument& label,
                   Matrix& outputGrad) override;
Z
zhangjinchao01 已提交
135 136 137 138 139

protected:
  MatrixPtr targetPerDim_;
};

140 141 142 143
/**
 * This cost layer compute Euclidean (L2) loss for real-valued regression
 * tasks.
 * \f[
X
xuwei06 已提交
144
 * L = \sum_{i=1}^N {|| \hat{y}_i - y_i||_2^2}
145 146
 * \f]
 */
Z
zhangjinchao01 已提交
147 148 149 150 151
class SumOfSquaresCostLayer : public CostLayer {
public:
  explicit SumOfSquaresCostLayer(const LayerConfig& config)
      : CostLayer(config) {}

Y
Yu Yang 已提交
152 153
  bool init(const LayerMap& layerMap,
            const ParameterMap& parameterMap) override;
Z
zhangjinchao01 已提交
154

Y
Yu Yang 已提交
155
  void forwardImp(Matrix& output, Argument& label, Matrix& cost) override;
Z
zhangjinchao01 已提交
156

Y
Yu Yang 已提交
157 158 159
  void backwardImp(Matrix& outputValue,
                   Argument& label,
                   Matrix& outputGrad) override;
Z
zhangjinchao01 已提交
160 161
};

162 163 164 165 166 167 168 169 170 171 172
/**
 * A cost layer for learning to rank (LTR) task. This layer contains at leat
 * three inputs.
 * \f[
 *  C_{i,j} = -\tilde{P_{ij}} * o_{i,j} + log(1 + e^{o_{i,j}}) \\
 *  o_{i,j} =  o_i - o_j  \\
 *  \tilde{P_{i,j}} = \left \{0, 0.5, 1 \right \} \ or \ \left \{0, 1 \right \}
 * \f]
 *
 * [1]. Chris Burges, Tal Shaked, Erin Renshaw, et al. Learning to
 *      Rank useing Gradient Descent.
Z
zhangjinchao01 已提交
173 174 175 176 177
 */
class RankingCost : public Layer {
public:
  explicit RankingCost(const LayerConfig& config) : Layer(config) {}

Y
Yu Yang 已提交
178 179
  bool init(const LayerMap& layerMap,
            const ParameterMap& parameterMap) override;
Z
zhangjinchao01 已提交
180 181 182 183 184

  LayerPtr getOutputLayer(size_t i) { return inputLayers_[i]; }

  LayerPtr getLabelLayer() { return inputLayers_[2]; }

Y
Yu Yang 已提交
185
  void forward(PassType passType) override;
Z
zhangjinchao01 已提交
186

Y
Yu Yang 已提交
187
  void backward(const UpdateCallback& callback = nullptr) override;
Z
zhangjinchao01 已提交
188

Y
Yu Yang 已提交
189
  void onPassEnd() override;
Z
zhangjinchao01 已提交
190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207

  void forwardImp(Matrix& output, Argument& label, Matrix& cost) {
    (void)output;
    (void)label;
    (void)cost;
  }

  void backwardImp(Matrix& outputValue, Argument& label, Matrix& outputGrad) {
    (void)outputValue;
    (void)label;
    (void)outputGrad;
  }

private:
  double posPairCount_;
  double negPairCount_;
  MatrixPtr margin_;
  MatrixPtr marginGrad_;
208
  /// if input label is put in ids (not value), copy to this buffer.
Z
zhangjinchao01 已提交
209 210 211 212
  MatrixPtr labelBuf_;
  LayerPtr weightLayer_;
};

213 214 215 216 217 218 219 220 221 222 223 224 225 226
/**
 * LambdaRank os a method for learning arbitrary information retrieval
 * measures. It can be applied to any algorithm that learns through gradient
 * descent. LambdaRank is a listwise method, in that the cost depends on the
 * sorted order of the documents. LambdaRank gives the gradient of cost
 * function:
 *
 * \f[
 * \lambda_{ij} = \frac{1}{1 + e^{o_i - o_j}} \left| \Delta_{NDCG} \right|
 * \f]
 *
 * [1] Christopher J.C. Burges, Robert Ragno, Quoc Viet Le. Learning to Rank
 *     with Nonsmooth Cost Functions.
 */
Z
zhangjinchao01 已提交
227 228 229 230
class LambdaCost : public Layer {
public:
  explicit LambdaCost(const LayerConfig& config) : Layer(config) {}

Y
Yu Yang 已提交
231 232
  bool init(const LayerMap& layerMap,
            const ParameterMap& parameterMap) override;
Z
zhangjinchao01 已提交
233 234 235 236 237

  LayerPtr getOutputLayer() { return inputLayers_[0]; }

  LayerPtr getScoreLayer() { return inputLayers_[1]; }

Y
Yu Yang 已提交
238
  void forward(PassType passType) override;
Z
zhangjinchao01 已提交
239

Y
Yu Yang 已提交
240
  void backward(const UpdateCallback& callback = nullptr) override;
Z
zhangjinchao01 已提交
241 242

  real calcNDCG(const real* outputScore, const real* score, int size);
243 244 245
  void calcGrad(const real* outputScore,
                const real* score,
                real* gradData,
Z
zhangjinchao01 已提交
246 247 248 249 250 251 252 253 254 255 256 257
                int size);

private:
  MatrixPtr marginGrad_;
  int truncationSize_;
  int maxSortSize_;
  std::vector<std::pair<real, int>> scorePair_;
  std::vector<std::pair<real, int>> outputScorePair_;
  std::vector<real> scoreVec_;
};

/**
258 259 260 261 262
 * Cross entropy for multi binary labels.
 * \f[
 * cost[i] = -sum(label[i][j]*log(output[i][j]) +
 *            (1-label[i][j])*log(1-output[i][j]))
 * \f]
Z
zhangjinchao01 已提交
263 264 265 266 267 268 269 270 271
 */
class MultiBinaryLabelCrossEntropy : public CostLayer {
protected:
  MatrixPtr targetPerDim_;

public:
  explicit MultiBinaryLabelCrossEntropy(const LayerConfig& config)
      : CostLayer(config) {}

Y
Yu Yang 已提交
272 273
  bool init(const LayerMap& layerMap,
            const ParameterMap& parameterMap) override;
Z
zhangjinchao01 已提交
274

Y
Yu Yang 已提交
275
  void forwardImp(Matrix& output, Argument& label, Matrix& cost) override;
Z
zhangjinchao01 已提交
276

Y
Yu Yang 已提交
277 278 279
  void backwardImp(Matrix& outputValue,
                   Argument& label,
                   Matrix& outputGrad) override;
Z
zhangjinchao01 已提交
280 281
};

282 283
/**
 * Huber loss for robust 2-classes classification.
Z
zhangjinchao01 已提交
284 285
 *
 * For label={0, 1}, let y=2*label-1. Given output f, the loss is:
286 287 288 289 290 291 292 293
 * \f[
 * Loss =
 * \left\{\begin{matrix}
 *  4 * y * f     &   \textit{if}  \ \  y* f < -1 \\
 *  (1 - y * f)^2 &  \textit{if}   \ \  -1 < y * f < 1  \\
 *  0             &                    \textit{otherwise}
 * \end{matrix}\right.
 * \f]
Z
zhangjinchao01 已提交
294 295 296
 */
class HuberTwoClass : public CostLayer {
  std::vector<Argument> tmpCpuInput_;
297

Z
zhangjinchao01 已提交
298 299 300
public:
  explicit HuberTwoClass(const LayerConfig& config) : CostLayer(config) {}

Y
Yu Yang 已提交
301 302
  bool init(const LayerMap& layerMap,
            const ParameterMap& parameterMap) override;
Z
zhangjinchao01 已提交
303

Y
Yu Yang 已提交
304
  void forwardImp(Matrix& output, Argument& label, Matrix& cost) override;
Z
zhangjinchao01 已提交
305 306 307

  void forwardImpIn(Matrix& output, Argument& label, Matrix& cost);

Y
Yu Yang 已提交
308 309 310
  void backwardImp(Matrix& outputValue,
                   Argument& label,
                   Matrix& outputGrad) override;
Z
zhangjinchao01 已提交
311 312 313 314 315 316

  void backwardImpIn(Matrix& outputValue, Argument& label, Matrix& outputGrad);
};

typedef std::shared_ptr<CostLayer> CostLayerPtr;
}  // namespace paddle