sequence_erase_op.cc 3.3 KB
Newer Older
1
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Y
Yibing Liu 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

Y
Yi Wang 已提交
15
#include "paddle/fluid/operators/sequence_erase_op.h"
16
#include <vector>
Y
Yibing Liu 已提交
17 18 19 20 21 22 23 24 25 26 27 28 29

namespace paddle {
namespace operators {

class SequenceEraseOp : public framework::OperatorWithKernel {
 public:
  using framework::OperatorWithKernel::OperatorWithKernel;

  void InferShape(framework::InferShapeContext* ctx) const override {
    PADDLE_ENFORCE(ctx->HasInput("X"),
                   "Input(X) of SequenceEraseOp should not be null.");
    PADDLE_ENFORCE(ctx->HasOutput("Out"),
                   "Output(Out) of SequenceEraseOp should not be null.");
Y
Yibing Liu 已提交
30 31 32 33 34
    auto x_dims = ctx->GetInputDim("X");
    PADDLE_ENFORCE(x_dims.size() == 2 && x_dims[1] == 1,
                   "Input(X) of SequenceEraseOp should be a 2-D LoDTensor "
                   "with the 2nd dimension equal to 1.");
    ctx->SetOutputDim("Out", x_dims);
Y
Yibing Liu 已提交
35 36 37 38 39 40 41 42
  }
};

class SequenceEraseOpMaker : public framework::OpProtoAndCheckerMaker {
 public:
  SequenceEraseOpMaker(OpProto* proto, OpAttrChecker* op_checker)
      : OpProtoAndCheckerMaker(proto, op_checker) {
    AddInput("X",
Y
Yibing Liu 已提交
43 44
             "(2-D LoDTensor with the 2nd dim. equal to 1) "
             "Input LoDTensor of SequenceEraseOp.");
Y
Yibing Liu 已提交
45
    AddOutput("Out",
Y
Yibing Liu 已提交
46 47
              "(2-D LoDTensor with the 2nd dim. equal to 1) "
              "Output LoDTensor of SequenceEraseOp.");
Y
Yibing Liu 已提交
48
    AddAttr<std::vector<int>>("tokens",
Y
Yibing Liu 已提交
49 50
                              "(vector<int>) Tokens need to be erased from "
                              "input sequences.");
Y
Yibing Liu 已提交
51 52 53
    AddComment(R"DOC(
Sequence Erase Operator.

54
Sequence erase operator erases tokens specified by Attr(tokens) from the input 
Y
Yibing Liu 已提交
55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77
sequences Input(X), and outputs the remaining data and modifies the LoD 
information at the same time. For example, given a 2-D LoDTensor

    X = [[2, 2, 6, 1, 3, 9, 6, 1, 0, 1]]^T

with lod = [[0, 3, 6, 10]], there are three sequences in the input:
   
     X1 = [[2, 2, 6]]^T, X2 = [[1, 3, 9]]^T and X3 = [[6, 1, 0, 1]]^T.

If the tokens to be erased are Attr(tokens) = [2, 3, 5], after the erasing 
operation, the three sequences become

    X1' = [[6]]^T, X2' = [[1, 9]]^T and X3' = [[6, 1, 0, 1]]^T.

Hence the LoDTensor Output(Out) should be

    Out = [[6, 1, 9, 6, 1, 0, 1]]^T,

with lod = [[0, 1, 3, 7]].

An example usage for this operator is to remove the special tokens when 
computing the edit distance between two strings, such as blank, start token, 
and end token.
Y
Yibing Liu 已提交
78 79 80 81 82 83 84 85 86 87 88 89
)DOC");
  }
};

}  // namespace operators
}  // namespace paddle

namespace ops = paddle::operators;
REGISTER_OP_WITHOUT_GRADIENT(sequence_erase, ops::SequenceEraseOp,
                             ops::SequenceEraseOpMaker);
REGISTER_OP_CPU_KERNEL(
    sequence_erase,
90 91
    ops::SequenceEraseKernel<paddle::platform::CPUDeviceContext, int32_t>,
    ops::SequenceEraseKernel<paddle::platform::CPUDeviceContext, int64_t>);