CostLayer.h 9.5 KB
Newer Older
1
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Z
zhangjinchao01 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#pragma once

#include <memory>
#include <vector>
#include "Layer.h"

namespace paddle {

/**
 * Base class for a particular type of cost layer.
 * This type of cost should have one data layer, one label layer
 * and an optional weight layer as input.
 * The derived class should implemnt forwardImp() and backwardImp()
 * which calculate the cost for data and label. The weight is automatically
 * handled by the base class.
 */
class CostLayer : public Layer {
public:
  explicit CostLayer(const LayerConfig& config) : Layer(config) {}

Y
Yu Yang 已提交
35 36
  bool init(const LayerMap& layerMap,
            const ParameterMap& parameterMap) override;
Z
zhangjinchao01 已提交
37 38 39 40 41

  LayerPtr getOutputLayer() { return inputLayers_[0]; }

  LayerPtr getLabelLayer() { return inputLayers_[1]; }

Y
Yu Yang 已提交
42
  void forward(PassType passType) override;
Z
zhangjinchao01 已提交
43

Y
Yu Yang 已提交
44
  void backward(const UpdateCallback& callback = nullptr) override;
Z
zhangjinchao01 已提交
45

46 47
  virtual void forwardImp(Matrix& outputValue,
                          Argument& label,
Z
zhangjinchao01 已提交
48 49
                          Matrix& cost) = 0;

50 51
  virtual void backwardImp(Matrix& outputValue,
                           Argument& label,
Z
zhangjinchao01 已提交
52 53 54 55 56 57 58
                           Matrix& outputGrad) = 0;

protected:
  LayerPtr weightLayer_;
  real coeff_;
};

59 60 61 62 63 64 65
/**
 * The cross-entropy loss for multi-class classification task.
 * The loss function is:
 *
 * \f[
 * L = - \sum_{i}{t_{k} * log(P(y=k))}
 * \f]
Z
zhangjinchao01 已提交
66 67 68 69 70 71
 */
class MultiClassCrossEntropy : public CostLayer {
public:
  explicit MultiClassCrossEntropy(const LayerConfig& config)
      : CostLayer(config) {}

Y
Yu Yang 已提交
72 73
  bool init(const LayerMap& layerMap,
            const ParameterMap& parameterMap) override;
Z
zhangjinchao01 已提交
74

Y
Yu Yang 已提交
75
  void forwardImp(Matrix& output, Argument& label, Matrix& cost) override;
Z
zhangjinchao01 已提交
76

Y
Yu Yang 已提交
77 78 79
  void backwardImp(Matrix& outputValue,
                   Argument& label,
                   Matrix& outputGrad) override;
Z
zhangjinchao01 已提交
80 81
};

82 83 84 85 86 87 88 89 90 91 92 93 94 95
/**
 * The cross-entropy with self-normalization for multi-class classification.
 *
 * The loss function is:
 * \f[
 * L = \sum_{i}[-log(P(x_{i})) + alpha * log(Z(x_{i})^2)]
 * \f]
 *
 * The \f$Z(x)\f$ is the softmax normalizer.
 *
 * [1] Jacob Devlin, Rabih Zbib, Zhongqiang Huang, Thomas Lamar,
 *     Richard Schwartz, and John Makhoul. Fast and robust neural
 *     network joint models for statistical machine translation.
 *     In Proceedings of the ACL 2014 Conference.
Z
zhangjinchao01 已提交
96 97 98 99 100 101
 */
class MultiClassCrossEntropyWithSelfNorm : public CostLayer {
public:
  explicit MultiClassCrossEntropyWithSelfNorm(const LayerConfig& config)
      : CostLayer(config) {}

Y
Yu Yang 已提交
102 103
  bool init(const LayerMap& layerMap,
            const ParameterMap& parameterMap) override;
Z
zhangjinchao01 已提交
104

Y
Yu Yang 已提交
105
  void forwardImp(Matrix& output, Argument& label, Matrix& cost) override;
Z
zhangjinchao01 已提交
106

Y
Yu Yang 已提交
107 108 109
  void backwardImp(Matrix& outputValue,
                   Argument& label,
                   Matrix& outputGrad) override;
Z
zhangjinchao01 已提交
110 111 112 113 114 115

protected:
  MatrixPtr sftMaxSum_;
  MatrixPtr sumInv_;
};

116 117 118 119 120
/**
 * The cross-entropy for soft binary class.
 * \f[
 * L = \sum_i (\sum_j -y_j(i)*log(x_j(i))-(1-y_j(i))*log(1-x_j(i)))
 * \f]
Z
zhangjinchao01 已提交
121 122 123 124 125 126
 */
class SoftBinaryClassCrossEntropy : public CostLayer {
public:
  explicit SoftBinaryClassCrossEntropy(const LayerConfig& config)
      : CostLayer(config) {}

Y
Yu Yang 已提交
127 128
  bool init(const LayerMap& layerMap,
            const ParameterMap& parameterMap) override;
Z
zhangjinchao01 已提交
129

Y
Yu Yang 已提交
130
  void forwardImp(Matrix& output, Argument& label, Matrix& cost) override;
Z
zhangjinchao01 已提交
131

Y
Yu Yang 已提交
132 133 134
  void backwardImp(Matrix& outputValue,
                   Argument& label,
                   Matrix& outputGrad) override;
Z
zhangjinchao01 已提交
135 136 137 138 139

protected:
  MatrixPtr targetPerDim_;
};

140 141 142 143
/**
 * This cost layer compute Euclidean (L2) loss for real-valued regression
 * tasks.
 * \f[
X
xuwei06 已提交
144
 * L = \sum_{i=1}^N {|| \hat{y}_i - y_i||_2^2}
145 146
 * \f]
 */
Z
zhangjinchao01 已提交
147 148 149 150 151
class SumOfSquaresCostLayer : public CostLayer {
public:
  explicit SumOfSquaresCostLayer(const LayerConfig& config)
      : CostLayer(config) {}

Y
Yu Yang 已提交
152 153
  bool init(const LayerMap& layerMap,
            const ParameterMap& parameterMap) override;
Z
zhangjinchao01 已提交
154

Y
Yu Yang 已提交
155
  void forwardImp(Matrix& output, Argument& label, Matrix& cost) override;
Z
zhangjinchao01 已提交
156

Y
Yu Yang 已提交
157 158 159
  void backwardImp(Matrix& outputValue,
                   Argument& label,
                   Matrix& outputGrad) override;
Z
zhangjinchao01 已提交
160 161
};

G
gaoyuan 已提交
162 163 164 165 166
/**
 * This cost layer compute smooth L1 loss for real-valued regression
 * tasks.
 * \f[
 * L =
Y
Yuan Gao 已提交
167 168
 *   (output - label)^2 * 0.5  / -1 < (output - label) < 1 /
 *   (output - label) - 0.5    / otherwise  /
G
gaoyuan 已提交
169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184
 * \f]
 */
class SmoothL1CostLayer : public CostLayer {
public:
  explicit SmoothL1CostLayer(const LayerConfig& config) : CostLayer(config) {}

  bool init(const LayerMap& layerMap,
            const ParameterMap& parameterMap) override;

  void forwardImp(Matrix& output, Argument& label, Matrix& cost) override;

  void backwardImp(Matrix& outputValue,
                   Argument& label,
                   Matrix& outputGrad) override;
};

185 186 187 188 189 190 191 192 193 194 195
/**
 * A cost layer for learning to rank (LTR) task. This layer contains at leat
 * three inputs.
 * \f[
 *  C_{i,j} = -\tilde{P_{ij}} * o_{i,j} + log(1 + e^{o_{i,j}}) \\
 *  o_{i,j} =  o_i - o_j  \\
 *  \tilde{P_{i,j}} = \left \{0, 0.5, 1 \right \} \ or \ \left \{0, 1 \right \}
 * \f]
 *
 * [1]. Chris Burges, Tal Shaked, Erin Renshaw, et al. Learning to
 *      Rank useing Gradient Descent.
Z
zhangjinchao01 已提交
196 197 198 199 200
 */
class RankingCost : public Layer {
public:
  explicit RankingCost(const LayerConfig& config) : Layer(config) {}

Y
Yu Yang 已提交
201 202
  bool init(const LayerMap& layerMap,
            const ParameterMap& parameterMap) override;
Z
zhangjinchao01 已提交
203 204 205 206 207

  LayerPtr getOutputLayer(size_t i) { return inputLayers_[i]; }

  LayerPtr getLabelLayer() { return inputLayers_[2]; }

Y
Yu Yang 已提交
208
  void forward(PassType passType) override;
Z
zhangjinchao01 已提交
209

Y
Yu Yang 已提交
210
  void backward(const UpdateCallback& callback = nullptr) override;
Z
zhangjinchao01 已提交
211

Y
Yu Yang 已提交
212
  void onPassEnd() override;
Z
zhangjinchao01 已提交
213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230

  void forwardImp(Matrix& output, Argument& label, Matrix& cost) {
    (void)output;
    (void)label;
    (void)cost;
  }

  void backwardImp(Matrix& outputValue, Argument& label, Matrix& outputGrad) {
    (void)outputValue;
    (void)label;
    (void)outputGrad;
  }

private:
  double posPairCount_;
  double negPairCount_;
  MatrixPtr margin_;
  MatrixPtr marginGrad_;
231
  /// if input label is put in ids (not value), copy to this buffer.
Z
zhangjinchao01 已提交
232 233 234 235
  MatrixPtr labelBuf_;
  LayerPtr weightLayer_;
};

236 237 238 239 240 241 242 243 244 245 246 247 248 249
/**
 * LambdaRank os a method for learning arbitrary information retrieval
 * measures. It can be applied to any algorithm that learns through gradient
 * descent. LambdaRank is a listwise method, in that the cost depends on the
 * sorted order of the documents. LambdaRank gives the gradient of cost
 * function:
 *
 * \f[
 * \lambda_{ij} = \frac{1}{1 + e^{o_i - o_j}} \left| \Delta_{NDCG} \right|
 * \f]
 *
 * [1] Christopher J.C. Burges, Robert Ragno, Quoc Viet Le. Learning to Rank
 *     with Nonsmooth Cost Functions.
 */
Z
zhangjinchao01 已提交
250 251 252 253
class LambdaCost : public Layer {
public:
  explicit LambdaCost(const LayerConfig& config) : Layer(config) {}

Y
Yu Yang 已提交
254 255
  bool init(const LayerMap& layerMap,
            const ParameterMap& parameterMap) override;
Z
zhangjinchao01 已提交
256 257 258 259 260

  LayerPtr getOutputLayer() { return inputLayers_[0]; }

  LayerPtr getScoreLayer() { return inputLayers_[1]; }

Y
Yu Yang 已提交
261
  void forward(PassType passType) override;
Z
zhangjinchao01 已提交
262

Y
Yu Yang 已提交
263
  void backward(const UpdateCallback& callback = nullptr) override;
Z
zhangjinchao01 已提交
264 265

  real calcNDCG(const real* outputScore, const real* score, int size);
266 267 268
  void calcGrad(const real* outputScore,
                const real* score,
                real* gradData,
Z
zhangjinchao01 已提交
269 270 271 272 273 274 275 276 277 278 279 280
                int size);

private:
  MatrixPtr marginGrad_;
  int truncationSize_;
  int maxSortSize_;
  std::vector<std::pair<real, int>> scorePair_;
  std::vector<std::pair<real, int>> outputScorePair_;
  std::vector<real> scoreVec_;
};

/**
281 282 283 284 285
 * Cross entropy for multi binary labels.
 * \f[
 * cost[i] = -sum(label[i][j]*log(output[i][j]) +
 *            (1-label[i][j])*log(1-output[i][j]))
 * \f]
Z
zhangjinchao01 已提交
286 287 288 289 290 291 292 293 294
 */
class MultiBinaryLabelCrossEntropy : public CostLayer {
protected:
  MatrixPtr targetPerDim_;

public:
  explicit MultiBinaryLabelCrossEntropy(const LayerConfig& config)
      : CostLayer(config) {}

Y
Yu Yang 已提交
295 296
  bool init(const LayerMap& layerMap,
            const ParameterMap& parameterMap) override;
Z
zhangjinchao01 已提交
297

Y
Yu Yang 已提交
298
  void forwardImp(Matrix& output, Argument& label, Matrix& cost) override;
Z
zhangjinchao01 已提交
299

Y
Yu Yang 已提交
300 301 302
  void backwardImp(Matrix& outputValue,
                   Argument& label,
                   Matrix& outputGrad) override;
Z
zhangjinchao01 已提交
303 304
};

305 306
/**
 * Huber loss for robust 2-classes classification.
Z
zhangjinchao01 已提交
307 308
 *
 * For label={0, 1}, let y=2*label-1. Given output f, the loss is:
309 310 311 312 313 314 315 316
 * \f[
 * Loss =
 * \left\{\begin{matrix}
 *  4 * y * f     &   \textit{if}  \ \  y* f < -1 \\
 *  (1 - y * f)^2 &  \textit{if}   \ \  -1 < y * f < 1  \\
 *  0             &                    \textit{otherwise}
 * \end{matrix}\right.
 * \f]
Z
zhangjinchao01 已提交
317 318 319
 */
class HuberTwoClass : public CostLayer {
  std::vector<Argument> tmpCpuInput_;
320

Z
zhangjinchao01 已提交
321 322 323
public:
  explicit HuberTwoClass(const LayerConfig& config) : CostLayer(config) {}

Y
Yu Yang 已提交
324 325
  bool init(const LayerMap& layerMap,
            const ParameterMap& parameterMap) override;
Z
zhangjinchao01 已提交
326

Y
Yu Yang 已提交
327
  void forwardImp(Matrix& output, Argument& label, Matrix& cost) override;
Z
zhangjinchao01 已提交
328 329 330

  void forwardImpIn(Matrix& output, Argument& label, Matrix& cost);

Y
Yu Yang 已提交
331 332 333
  void backwardImp(Matrix& outputValue,
                   Argument& label,
                   Matrix& outputGrad) override;
Z
zhangjinchao01 已提交
334 335 336 337 338 339

  void backwardImpIn(Matrix& outputValue, Argument& label, Matrix& outputGrad);
};

typedef std::shared_ptr<CostLayer> CostLayerPtr;
}  // namespace paddle