cross_entropy_op.cc 9.3 KB
Newer Older
1
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Q
Qiao Longfei 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

Y
Yi Wang 已提交
15
#include "paddle/fluid/operators/cross_entropy_op.h"
C
chengduo 已提交
16
#include <string>
Q
Qiao Longfei 已提交
17 18 19 20

namespace paddle {
namespace operators {

21
class CrossEntropyOp : public framework::OperatorWithKernel {
Y
Yu Yang 已提交
22 23 24
 public:
  using framework::OperatorWithKernel::OperatorWithKernel;

25
  void InferShape(framework::InferShapeContext* ctx) const override {
Q
Qiao Longfei 已提交
26 27 28
    PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should be not null.");
    PADDLE_ENFORCE(ctx->HasInput("Label"), "Input(Label) should be not null.");
    PADDLE_ENFORCE(ctx->HasOutput("Y"), "Output(Y) should be not null.");
29

Q
Qiao Longfei 已提交
30 31
    auto x_dims = ctx->GetInputDim("X");
    auto label_dims = ctx->GetInputDim("Label");
F
stash  
fengjiayi 已提交
32 33 34
    int rank = x_dims.size();
    PADDLE_ENFORCE_EQ(rank, label_dims.size(),
                      "Input(X) and Input(Label) shall have the same rank.");
35 36 37 38 39 40 41 42 43 44 45
    bool check = true;
    if ((!ctx->IsRuntime()) && (framework::product(x_dims) <= 0 ||
                                framework::product(label_dims) <= 0)) {
      check = false;
    }
    if (check) {
      PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1),
                        framework::slice_ddim(label_dims, 0, rank - 1),
                        "Input(X) and Input(Label) shall have the same shape "
                        "except the last dimension.");
    }
46
    if (ctx->Attrs().Get<bool>("soft_label")) {
47 48 49 50 51
      if (check) {
        PADDLE_ENFORCE_EQ(x_dims[rank - 1], label_dims[rank - 1],
                          "If Attr(soft_label) == true, the last dimension of "
                          "Input(X) and Input(Label) should be equal.");
      }
52
    } else {
F
stash  
fengjiayi 已提交
53 54
      PADDLE_ENFORCE_EQ(label_dims[rank - 1], 1UL,
                        "If Attr(softLabel) == false, the last dimension of "
C
caoying03 已提交
55
                        "Input(Label) should be 1.");
56
    }
57

F
fengjiayi 已提交
58 59 60
    auto y_dims = x_dims;
    y_dims[rank - 1] = 1;
    ctx->SetOutputDim("Y", y_dims);
Q
Qiao Longfei 已提交
61
    ctx->ShareLoD("X", /*->*/ "Y");
Q
Qiao Longfei 已提交
62
  }
Y
Yu Yang 已提交
63

64
 protected:
C
Cao Ying 已提交
65
  // Explicitly set that the data type of computation kernel of cross_entropy
C
caoying03 已提交
66
  // is determined by its input "X".
67
  framework::OpKernelType GetExpectedKernelType(
Y
Yu Yang 已提交
68
      const framework::ExecutionContext& ctx) const override {
Y
Yu Yang 已提交
69 70
    return framework::OpKernelType(ctx.Input<Tensor>("X")->type(),
                                   ctx.device_context());
Y
Yu Yang 已提交
71
  }
Q
Qiao Longfei 已提交
72 73
};

74
class CrossEntropyGradientOp : public framework::OperatorWithKernel {
Y
Yu Yang 已提交
75 76 77
 public:
  using framework::OperatorWithKernel::OperatorWithKernel;

78
  void InferShape(framework::InferShapeContext* ctx) const override {
Q
Qiao Longfei 已提交
79 80 81 82 83 84
    PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should be not null.");
    PADDLE_ENFORCE(ctx->HasInput("Label"), "Input(Label) should be not null.");
    PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Y")),
                   "Input(Y@GRAD) shoudl be not null.");
    PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("X")),
                   "Output(X@GRAD) should be not null.");
85

Q
Qiao Longfei 已提交
86 87 88
    auto x_dims = ctx->GetInputDim("X");
    auto label_dims = ctx->GetInputDim("Label");
    auto dy_dims = ctx->GetInputDim(framework::GradVarName("Y"));
F
stash  
fengjiayi 已提交
89 90 91 92 93
    int rank = x_dims.size();
    PADDLE_ENFORCE_EQ(dy_dims.size(), rank,
                      "Input(Y@Grad) and Input(X) should have the same rank.");
    PADDLE_ENFORCE_EQ(label_dims.size(), rank,
                      "Input(Label) and Input(X) should have the same rank.");
94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110

    bool check = true;
    if ((!ctx->IsRuntime()) && (framework::product(x_dims) <= 0 ||
                                framework::product(label_dims) <= 0)) {
      check = false;
    }

    if (check) {
      PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1),
                        framework::slice_ddim(label_dims, 0, rank - 1),
                        "The Input(X) and Input(Label) should have the same "
                        "shape except the last dimension.");
      PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1),
                        framework::slice_ddim(dy_dims, 0, rank - 1),
                        "The Input(X) and Input(Y@Grad) should have the same "
                        "shape except the last dimension.");
    }
F
stash  
fengjiayi 已提交
111 112
    PADDLE_ENFORCE_EQ(dy_dims[rank - 1], 1,
                      "The last dimension of Input(Y@Grad) should be 1.");
113
    if (ctx->Attrs().Get<bool>("soft_label")) {
114 115 116 117 118 119
      if (check) {
        PADDLE_ENFORCE_EQ(
            x_dims[rank - 1], label_dims[rank - 1],
            "When Attr(soft_label) == true, the last dimension of "
            "Input(X) and Input(Label) should be equal.");
      }
120
    } else {
F
stash  
fengjiayi 已提交
121 122
      PADDLE_ENFORCE_EQ(label_dims[rank - 1], 1,
                        "When Attr(soft_label) == false, the last dimension of "
C
caoying03 已提交
123
                        "Input(Label) should be 1.");
124
    }
Q
Qiao Longfei 已提交
125
    ctx->SetOutputDim(framework::GradVarName("X"), x_dims);
Q
Qiao Longfei 已提交
126
    ctx->ShareLoD("X", framework::GradVarName("X"));
Y
Yan Chunwei 已提交
127
  }
Y
Yu Yang 已提交
128

129
 protected:
C
Cao Ying 已提交
130 131
  // Explicitly set that the data type of computation kernel of cross_entropy
  // is determined by its input "X".
132
  framework::OpKernelType GetExpectedKernelType(
Y
Yu Yang 已提交
133
      const framework::ExecutionContext& ctx) const override {
Y
Yu Yang 已提交
134 135
    return framework::OpKernelType(ctx.Input<Tensor>("X")->type(),
                                   ctx.device_context());
Y
Yu Yang 已提交
136
  }
Y
Yan Chunwei 已提交
137 138
};

139
class CrossEntropyOpMaker : public framework::OpProtoAndCheckerMaker {
140
 public:
Y
Yu Yang 已提交
141
  void Make() override {
C
caoying03 已提交
142
    AddInput("X",
F
stash  
fengjiayi 已提交
143 144 145 146 147 148 149 150 151 152
             "(Tensor, default Tensor<float>), a tensor whose last dimension "
             "size is equal to the number of classes. This input is a "
             "probability computed by the previous operator, which is almost "
             "always the result of a softmax operator.");
    AddInput(
        "Label",
        "(Tensor), the tensor which represents the ground truth. It has the "
        "same shape with 'X' except the last dimension. When soft_label is set "
        "to false, the last dimension size is 1; when soft_label is set to "
        "true, the last dimension size is equal to the number of classes.");
C
caoying03 已提交
153
    AddOutput("Y",
F
stash  
fengjiayi 已提交
154 155 156
              "(Tensor, default Tensor<float>), a tensor whose shape is same "
              "with 'X' except that the last dimension size is 1. It "
              "represents the cross entropy loss.");
C
caoying03 已提交
157 158 159
    AddAttr<bool>("soft_label",
                  "(bool, default false), a flag indicating whether to "
                  "interpretate the given labels as soft labels.")
160
        .SetDefault(false);
161 162 163 164 165
    AddAttr<int>("ignore_index",
                 "(int, default -100), Specifies a target value that is"
                 "ignored and does not contribute to the input gradient."
                 "Only valid if soft_label is set to False")
        .SetDefault(-100);
Q
Qiao Longfei 已提交
166
    AddComment(R"DOC(
167
CrossEntropy Operator.
Q
Qiao Longfei 已提交
168

F
stash  
fengjiayi 已提交
169 170 171 172 173 174
The input 'X' and 'Label' will first be logically flattened to 2-D matrixs. 
The matrix's second dimension(row length) is as same as the original last 
dimension, and the first dimension(column length) is the product of all other 
original dimensions. Then the softmax computation will take palce on each raw 
of flattened matrixs.

175 176 177
It supports both standard cross-entropy and soft-label cross-entropy loss
computation.
1) One-hot cross-entropy:
178
    soft_label = false, Label[i, 0] indicates the class index for sample i:
179

K
Kexin Zhao 已提交
180
                $Y[i] = -\log(X[i, Label[i]])$
Q
Qiao Longfei 已提交
181

182
2) Soft-label cross-entropy:
183
    soft_label = true, Label[i, j] indicates the soft label of class j
184
    for sample i:
185

K
Kexin Zhao 已提交
186
                $Y[i] = \sum_j{-Label[i, j] * log(X[i, j])}$
187

188
   Please make sure that in this case the summuation of each row of Label
189 190 191 192 193 194
   equals one.

3) One-hot cross-entropy with vecterized Input(Label):
     As a special case of 2), when each row of Input(Label) has only one
     non-zero element (equals 1), soft-label cross-entropy degenerates to a
     one-hot cross-entropy with one-hot label representation.
D
dangqingqing 已提交
195

K
Kexin Zhao 已提交
196 197 198
Both the input X and Label can carry the LoD (Level of Details) information,
or not. But the output only shares the LoD information with input X.

Q
Qiao Longfei 已提交
199 200 201
)DOC");
  }
};
C
chengduo 已提交
202 203 204 205 206 207 208 209 210

class CrossEntropyOpInferVarType
    : public framework::PassInDtypeAndVarTypeToOutput {
 protected:
  std::unordered_map<std::string, std::string> GetInputOutputWithSameType()
      const override {
    return std::unordered_map<std::string, std::string>{{"X", /*->*/ "Y"}};
  }
};
Q
Qiao Longfei 已提交
211 212 213
}  // namespace operators
}  // namespace paddle

D
dongzhihong 已提交
214
namespace ops = paddle::operators;
215 216
using CPUCtx = paddle::platform::CPUDeviceContext;

Y
Yang Yang 已提交
217
REGISTER_OPERATOR(cross_entropy, ops::CrossEntropyOp, ops::CrossEntropyOpMaker,
C
chengduo 已提交
218
                  ops::CrossEntropyOpInferVarType,
219 220
                  paddle::framework::DefaultGradOpDescMaker<true>);
REGISTER_OPERATOR(cross_entropy_grad, ops::CrossEntropyGradientOp);
221 222
REGISTER_OP_CPU_KERNEL(cross_entropy, ops::CrossEntropyOpKernel<CPUCtx, float>,
                       ops::CrossEntropyOpKernel<CPUCtx, double>);
223
REGISTER_OP_CPU_KERNEL(cross_entropy_grad,
224 225
                       ops::CrossEntropyGradientOpKernel<CPUCtx, float>,
                       ops::CrossEntropyGradientOpKernel<CPUCtx, double>);