kldiv_loss_op.cc 7.0 KB
Newer Older
D
dengkaipeng 已提交
1 2 3 4 5 6 7 8 9 10 11
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at
   http://www.apache.org/licenses/LICENSE-2.0
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License. */

D
dengkaipeng 已提交
12
#include <memory>
D
dengkaipeng 已提交
13 14 15 16 17 18 19 20 21 22 23 24
#include <string>
#include "paddle/fluid/framework/op_registry.h"

namespace paddle {
namespace operators {

using framework::Tensor;

class KLDivLossOp : public framework::OperatorWithKernel {
 public:
  using framework::OperatorWithKernel::OperatorWithKernel;
  void InferShape(framework::InferShapeContext* ctx) const override {
K
Kaipeng Deng 已提交
25 26 27
    OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "KLDivLoss");
    OP_INOUT_CHECK(ctx->HasInput("Target"), "Input", "Target", "KLDivLoss");
    OP_INOUT_CHECK(ctx->HasOutput("Loss"), "Output", "Loss", "KLDivLoss");
D
dengkaipeng 已提交
28 29 30 31

    auto dim_x = ctx->GetInputDim("X");
    auto dim_target = ctx->GetInputDim("Target");
    PADDLE_ENFORCE_EQ(dim_x.size(), dim_target.size(),
32 33 34 35
                      platform::errors::InvalidArgument(
                          "Input(X) rank and Input(Target) rank should be "
                          "same, but received X rank(%d) != Target rank(%d)",
                          dim_x.size(), dim_target.size()));
D
dengkaipeng 已提交
36
    for (int i = 0; i < dim_x.size(); i++) {
37
      if (ctx->IsRuntime() || (dim_x[i] > 0 && dim_target[i] > 0)) {
38 39 40 41 42 43
        PADDLE_ENFORCE_EQ(
            dim_x[i], dim_target[i],
            platform::errors::InvalidArgument(
                "Input(X) and Input(Target) should in same shape. but received "
                "X dimension[%d](%d) != Target dimension[%d](%d)",
                i, dim_x[i], i, dim_target[i]));
44
      }
D
dengkaipeng 已提交
45 46 47 48
    }

    auto reduction = ctx->Attrs().Get<std::string>("reduction");

49 50 51 52 53 54
    auto reduction_valid = "mean" == reduction || "sum" == reduction ||
                           "batchmean" == reduction || "none" == reduction;
    PADDLE_ENFORCE_EQ(
        reduction_valid, true,
        platform::errors::InvalidArgument(
            "Attr(reduction) can only be 'none'|'batchmean'|'sum'|'mean'."));
D
dengkaipeng 已提交
55 56 57 58

    if ("none" == reduction) {
      ctx->SetOutputDim("Loss", dim_x);
    } else {
D
dengkaipeng 已提交
59
      ctx->SetOutputDim("Loss", {1});
D
dengkaipeng 已提交
60 61 62 63 64 65
    }
  }

 protected:
  framework::OpKernelType GetExpectedKernelType(
      const framework::ExecutionContext& ctx) const override {
66 67
    return framework::OpKernelType(
        OperatorWithKernel::IndicateVarDataType(ctx, "X"), ctx.GetPlace());
D
dengkaipeng 已提交
68 69 70 71 72 73 74
  }
};

class KLDivLossOpMaker : public framework::OpProtoAndCheckerMaker {
 public:
  void Make() override {
    AddInput("X",
D
dengkaipeng 已提交
75 76
             "The input tensor of KL divergence loss operator. "
             "This is a tensor with shape of [N, *], where N is the "
K
Kaipeng Deng 已提交
77 78
             "batch size, * means any number of additional dimensions. "
             "The data type is float32 or flaot64");
D
dengkaipeng 已提交
79
    AddInput("Target",
D
dengkaipeng 已提交
80
             "The  tensor of KL divergence loss operator. "
K
Kaipeng Deng 已提交
81 82
             "This is a tensor with shape of Input(X). "
             "The data type is same as Input(X)");
D
dengkaipeng 已提交
83 84 85 86 87 88 89 90 91 92 93
    AddOutput(
        "Loss",
        "The output KL divergence loss tensor. if Attr(reduction) is "
        "'none', this tensor should be in same shape of of Input(X), else "
        "this tensor should be in shape of [1].");

    AddAttr<std::string>(
        "reduction",
        "The reduction type to apply to the output, available types "
        "are 'none' | 'batchmean' | 'mean' | 'sum', 'none' for no "
        "reduction, 'batchmean' for the sum of output divided by "
D
dengkaipeng 已提交
94
        "batch size, 'mean' for the average value of all output, "
D
dengkaipeng 已提交
95 96 97 98 99
        "'sum' for the sum of the output.")
        .SetDefault("mean");

    AddComment(R"DOC(
         This operator calculates the Kullback-Leibler divergence loss
K
Kaipeng Deng 已提交
100 101
         between Input(X) and Input(Target). Notes that Input(X) is the
         log-probability and Input(Target) is the probability.
D
dengkaipeng 已提交
102

D
dengkaipeng 已提交
103
         KL divergence loss is calculated as follows:
D
dengkaipeng 已提交
104

D
dengkaipeng 已提交
105 106 107
         $$l(x, y) = y * (\log(y) - x)$$

         While :math:`x` is Input(X) and :math:`y` is Input(Target).
D
dengkaipeng 已提交
108 109

         While :attr:`reduction` is :attr:`none`, output loss is in
D
dengkaipeng 已提交
110 111
         the same shape as Input(X), loss in each point is calculated 
         seperately and no reduction is applied.
D
dengkaipeng 已提交
112
         
D
dengkaipeng 已提交
113
         While :attr:`reduction` is :attr:`mean`, output loss is in
D
dengkaipeng 已提交
114 115
         shape of [1] and loss value is the mean value of all losses.
         
D
dengkaipeng 已提交
116
         While :attr:`reduction` is :attr:`sum`, output loss is in
D
dengkaipeng 已提交
117 118
         shape of [1] and loss value is the sum value of all losses.
         
D
dengkaipeng 已提交
119
         While :attr:`reduction` is :attr:`batchmean`, output loss is 
D
dengkaipeng 已提交
120 121
         in shape of [1] and loss value is the sum value of all losses
         divided by batch size.
D
dengkaipeng 已提交
122 123 124 125 126 127 128 129 130
         
         )DOC");
  }
};

class KLDivLossOpGrad : public framework::OperatorWithKernel {
 public:
  using framework::OperatorWithKernel::OperatorWithKernel;
  void InferShape(framework::InferShapeContext* ctx) const override {
K
Kaipeng Deng 已提交
131 132 133 134
    OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "KLDivLossGrad");
    OP_INOUT_CHECK(ctx->HasInput("Target"), "Input", "Target", "KLDivLossGrad");
    OP_INOUT_CHECK(ctx->HasInput(framework::GradVarName("Loss")), "Input",
                   "Loss@GRAD", "KLDivLossGrad");
D
dengkaipeng 已提交
135 136 137 138 139 140 141 142 143
    auto dim_x = ctx->GetInputDim("X");
    if (ctx->HasOutput(framework::GradVarName("X"))) {
      ctx->SetOutputDim(framework::GradVarName("X"), dim_x);
    }
  }

 protected:
  framework::OpKernelType GetExpectedKernelType(
      const framework::ExecutionContext& ctx) const override {
144 145 146
    return framework::OpKernelType(OperatorWithKernel::IndicateVarDataType(
                                       ctx, framework::GradVarName("Loss")),
                                   ctx.GetPlace());
D
dengkaipeng 已提交
147 148 149
  }
};

H
hong 已提交
150 151
template <typename T>
class KLDivLossOpGradMaker : public framework::SingleGradOpMaker<T> {
D
dengkaipeng 已提交
152
 public:
H
hong 已提交
153
  using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
D
dengkaipeng 已提交
154 155

 protected:
156
  void Apply(GradOpPtr<T> op) const override {
D
dengkaipeng 已提交
157
    op->SetType("kldiv_loss_grad");
H
hong 已提交
158 159 160
    op->SetInput("X", this->Input("X"));
    op->SetInput("Target", this->Input("Target"));
    op->SetInput(framework::GradVarName("Loss"), this->OutputGrad("Loss"));
D
dengkaipeng 已提交
161

H
hong 已提交
162
    op->SetAttrMap(this->Attrs());
D
dengkaipeng 已提交
163

H
hong 已提交
164
    op->SetOutput(framework::GradVarName("X"), this->InputGrad("X"));
D
dengkaipeng 已提交
165 166 167
  }
};

168
DECLARE_NO_NEED_BUFFER_VARS_INFERER(KLDivLossGradNoNeedBufferVarInferer, "X");
169

D
dengkaipeng 已提交
170 171 172 173 174
}  // namespace operators
}  // namespace paddle

namespace ops = paddle::operators;
REGISTER_OPERATOR(kldiv_loss, ops::KLDivLossOp, ops::KLDivLossOpMaker,
H
hong 已提交
175 176
                  ops::KLDivLossOpGradMaker<paddle::framework::OpDesc>,
                  ops::KLDivLossOpGradMaker<paddle::imperative::OpBase>);
177
REGISTER_OPERATOR(kldiv_loss_grad, ops::KLDivLossOpGrad,
178
                  ops::KLDivLossGradNoNeedBufferVarInferer);