sequence_softmax_op.cc 5.6 KB
Newer Older
1
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
2 3 4 5 6 7 8 9 10 11 12 13 14

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

W
Wu Yi 已提交
15
#include "paddle/fluid/operators/sequence_ops/sequence_softmax_op.h"
16

17
#include <string>
18

19 20 21 22
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#include "paddle/fluid/platform/device/gpu/gpu_dnn.h"
#endif

23 24 25 26 27 28 29
namespace paddle {
namespace operators {

class SequenceSoftmaxOp : public framework::OperatorWithKernel {
 public:
  using framework::OperatorWithKernel::OperatorWithKernel;

30
  void InferShape(framework::InferShapeContext* ctx) const override {
31 32
    OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "SequenceSoftmax");
    OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "SequenceSoftmax");
33 34

    ctx->ShareDim("X", /*->*/ "Out");
35
    ctx->ShareLoD("X", /*->*/ "Out");
36
  }
37 38 39 40

 protected:
  framework::OpKernelType GetExpectedKernelType(
      const framework::ExecutionContext& ctx) const override {
41 42 43 44 45 46
    auto input_data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X");
    phi::DataLayout layout_ = DataLayout::kAnyLayout;
    if (ctx.HasAttr("data_format")) {
      layout_ = phi::StringToDataLayout(ctx.Attr<std::string>("data_format"));
    }
    return framework::OpKernelType(input_data_type, ctx.GetPlace(), layout_);
47
  }
48 49 50 51
};

class SequenceSoftmaxOpMaker : public framework::OpProtoAndCheckerMaker {
 public:
Y
Yu Yang 已提交
52
  void Make() override {
53 54 55 56 57 58
    AddInput("X",
             "(LoDTensor) 1-D or 2-D input LoDTensor with the 2-nd dimension "
             "of length 1.");
    AddOutput("Out",
              "(LoDTensor) 1-D or 2-D output LoDTensor with the 2-nd dimension "
              "of length 1.");
59 60 61
    AddAttr<bool>(
        "use_cudnn",
        "(bool, default false) Only used in cudnn kernel, need install cudnn")
62 63
        .SetDefault(false)
        .AsExtra();
64
    AddComment(R"DOC(
65 66 67
Sequence Softmax Operator.

SequenceSoftmaxOp computes the softmax activation among all time-steps for each
68
sequence. The dimension of each time-step should be 1. Thus, the shape of
69 70
input Tensor can be either [N, 1] or [N], where N is the sum of the length
of all sequences.
71

72
The algorithm works as follows:
W
whs 已提交
73

74
    for i-th sequence in a mini-batch:
W
whs 已提交
75 76 77 78 79 80

$$
Out(X[lod[i]:lod[i+1]], :) = \
\frac{\exp(X[lod[i]:lod[i+1], :])} \
{\sum(\exp(X[lod[i]:lod[i+1], :]))}
$$
81 82 83

For example, for a mini-batch of 3 sequences with variable-length,
each containing 2, 3, 2 time-steps, the lod of which is [0, 2, 5, 7],
84
then softmax will be computed among X[0:2, :], X[2:5, :], X[5:7, :]
85
and N turns out to be 7.
86

87 88 89 90 91 92 93 94
)DOC");
  }
};

class SequenceSoftmaxGradOp : public framework::OperatorWithKernel {
 public:
  using framework::OperatorWithKernel::OperatorWithKernel;

95
  void InferShape(framework::InferShapeContext* ctx) const override {
96
    OP_INOUT_CHECK(ctx->HasInput("Out"), "Input", "Out", "SequenceSoftmaxGrad");
97 98 99 100
    OP_INOUT_CHECK(ctx->HasInput(framework::GradVarName("Out")),
                   "Input",
                   "Out@GRAD",
                   "SequenceSoftmaxGrad");
101
    OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "SequenceSoftmaxGrad");
102 103 104 105
    OP_INOUT_CHECK(ctx->HasOutput(framework::GradVarName("X")),
                   "Output",
                   "X@GRAD",
                   "SequenceSoftmaxGrad");
106 107 108

    auto out_dim = ctx->GetInputDim("Out");
    auto out_grad_dim = ctx->GetInputDim(framework::GradVarName("Out"));
109
    PADDLE_ENFORCE_EQ(
110 111
        out_dim,
        out_grad_dim,
112 113 114 115
        platform::errors::InvalidArgument(
            "The shape of Input(Out) and Input(Out@GRAD) of "
            "SequenceSoftmaxGrad operator do not match. The Input(Out)'s shape "
            "is [%s], the Input(Out@GRAD)'s shape is [%s].",
116 117
            out_dim,
            out_grad_dim));
118 119 120

    ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X"));
  }
121 122 123 124

 protected:
  framework::OpKernelType GetExpectedKernelType(
      const framework::ExecutionContext& ctx) const override {
125 126 127 128 129 130
    auto input_data_type = OperatorWithKernel::IndicateVarDataType(ctx, "Out");
    phi::DataLayout layout_ = DataLayout::kAnyLayout;
    if (ctx.HasAttr("data_format")) {
      layout_ = phi::StringToDataLayout(ctx.Attr<std::string>("data_format"));
    }
    return framework::OpKernelType(input_data_type, ctx.GetPlace(), layout_);
131
  }
132 133
};

134
DECLARE_NO_NEED_BUFFER_VARS_INFERER(
135 136
    SequenceSoftmaxGradOpNoNeedBufferVarsInferer, "X");

137 138 139 140
}  // namespace operators
}  // namespace paddle

namespace ops = paddle::operators;
H
hong 已提交
141
REGISTER_OPERATOR(
142 143 144
    sequence_softmax,
    ops::SequenceSoftmaxOp,
    ops::SequenceSoftmaxOpMaker,
H
hong 已提交
145 146
    paddle::framework::DefaultGradOpMaker<paddle::framework::OpDesc, true>,
    paddle::framework::DefaultGradOpMaker<paddle::imperative::OpBase, true>);
147 148
REGISTER_OPERATOR(sequence_softmax_grad,
                  ops::SequenceSoftmaxGradOp,
149
                  ops::SequenceSoftmaxGradOpNoNeedBufferVarsInferer);
L
Leo Chen 已提交
150 151 152 153 154 155
REGISTER_OP_CPU_KERNEL(sequence_softmax,
                       ops::SequenceSoftmaxKernel<phi::CPUContext, float>,
                       ops::SequenceSoftmaxKernel<phi::CPUContext, double>);
REGISTER_OP_CPU_KERNEL(sequence_softmax_grad,
                       ops::SequenceSoftmaxGradKernel<phi::CPUContext, float>,
                       ops::SequenceSoftmaxGradKernel<phi::CPUContext, double>);