sequence_erase_op.cc 3.2 KB
Newer Older
1
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Y
Yibing Liu 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

Y
Yi Wang 已提交
15
#include "paddle/fluid/operators/sequence_erase_op.h"
16
#include <vector>
Y
Yibing Liu 已提交
17 18 19 20 21 22 23 24 25 26

namespace paddle {
namespace operators {

class SequenceEraseOp : public framework::OperatorWithKernel {
 public:
  using framework::OperatorWithKernel::OperatorWithKernel;

  void InferShape(framework::InferShapeContext* ctx) const override {
    PADDLE_ENFORCE(ctx->HasInput("X"),
C
chenweihang 已提交
27
                   "Input(X) of SequenceErase operator should not be null.");
Y
Yibing Liu 已提交
28
    PADDLE_ENFORCE(ctx->HasOutput("Out"),
C
chenweihang 已提交
29
                   "Output(Out) of SequenceErase operator should not be null.");
Y
Yibing Liu 已提交
30 31 32 33 34
    auto x_dims = ctx->GetInputDim("X");
    PADDLE_ENFORCE(x_dims.size() == 2 && x_dims[1] == 1,
                   "Input(X) of SequenceEraseOp should be a 2-D LoDTensor "
                   "with the 2nd dimension equal to 1.");
    ctx->SetOutputDim("Out", x_dims);
Y
Yibing Liu 已提交
35 36 37 38 39
  }
};

class SequenceEraseOpMaker : public framework::OpProtoAndCheckerMaker {
 public:
Y
Yu Yang 已提交
40
  void Make() override {
Y
Yibing Liu 已提交
41
    AddInput("X",
Y
Yibing Liu 已提交
42 43
             "(2-D LoDTensor with the 2nd dim. equal to 1) "
             "Input LoDTensor of SequenceEraseOp.");
Y
Yibing Liu 已提交
44
    AddOutput("Out",
Y
Yibing Liu 已提交
45 46
              "(2-D LoDTensor with the 2nd dim. equal to 1) "
              "Output LoDTensor of SequenceEraseOp.");
Y
Yibing Liu 已提交
47
    AddAttr<std::vector<int>>("tokens",
Y
Yibing Liu 已提交
48 49
                              "(vector<int>) Tokens need to be erased from "
                              "input sequences.");
Y
Yibing Liu 已提交
50 51 52
    AddComment(R"DOC(
Sequence Erase Operator.

53
Sequence erase operator erases tokens specified by Attr(tokens) from the input 
Y
Yibing Liu 已提交
54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76
sequences Input(X), and outputs the remaining data and modifies the LoD 
information at the same time. For example, given a 2-D LoDTensor

    X = [[2, 2, 6, 1, 3, 9, 6, 1, 0, 1]]^T

with lod = [[0, 3, 6, 10]], there are three sequences in the input:
   
     X1 = [[2, 2, 6]]^T, X2 = [[1, 3, 9]]^T and X3 = [[6, 1, 0, 1]]^T.

If the tokens to be erased are Attr(tokens) = [2, 3, 5], after the erasing 
operation, the three sequences become

    X1' = [[6]]^T, X2' = [[1, 9]]^T and X3' = [[6, 1, 0, 1]]^T.

Hence the LoDTensor Output(Out) should be

    Out = [[6, 1, 9, 6, 1, 0, 1]]^T,

with lod = [[0, 1, 3, 7]].

An example usage for this operator is to remove the special tokens when 
computing the edit distance between two strings, such as blank, start token, 
and end token.
Y
Yibing Liu 已提交
77 78 79 80 81 82 83 84 85 86 87 88
)DOC");
  }
};

}  // namespace operators
}  // namespace paddle

namespace ops = paddle::operators;
REGISTER_OP_WITHOUT_GRADIENT(sequence_erase, ops::SequenceEraseOp,
                             ops::SequenceEraseOpMaker);
REGISTER_OP_CPU_KERNEL(
    sequence_erase,
89 90
    ops::SequenceEraseKernel<paddle::platform::CPUDeviceContext, int32_t>,
    ops::SequenceEraseKernel<paddle::platform::CPUDeviceContext, int64_t>);