sequence_erase_op.cc 3.3 KB
Newer Older
Y
Yibing Liu 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

Y
Yi Wang 已提交
15
#include "paddle/fluid/operators/sequence_erase_op.h"
Y
Yibing Liu 已提交
16 17 18 19 20 21 22 23 24 25 26 27 28

namespace paddle {
namespace operators {

class SequenceEraseOp : public framework::OperatorWithKernel {
 public:
  using framework::OperatorWithKernel::OperatorWithKernel;

  void InferShape(framework::InferShapeContext* ctx) const override {
    PADDLE_ENFORCE(ctx->HasInput("X"),
                   "Input(X) of SequenceEraseOp should not be null.");
    PADDLE_ENFORCE(ctx->HasOutput("Out"),
                   "Output(Out) of SequenceEraseOp should not be null.");
Y
Yibing Liu 已提交
29 30 31 32 33
    auto x_dims = ctx->GetInputDim("X");
    PADDLE_ENFORCE(x_dims.size() == 2 && x_dims[1] == 1,
                   "Input(X) of SequenceEraseOp should be a 2-D LoDTensor "
                   "with the 2nd dimension equal to 1.");
    ctx->SetOutputDim("Out", x_dims);
Y
Yibing Liu 已提交
34 35 36 37 38 39 40 41
  }
};

class SequenceEraseOpMaker : public framework::OpProtoAndCheckerMaker {
 public:
  SequenceEraseOpMaker(OpProto* proto, OpAttrChecker* op_checker)
      : OpProtoAndCheckerMaker(proto, op_checker) {
    AddInput("X",
Y
Yibing Liu 已提交
42 43
             "(2-D LoDTensor with the 2nd dim. equal to 1) "
             "Input LoDTensor of SequenceEraseOp.");
Y
Yibing Liu 已提交
44
    AddOutput("Out",
Y
Yibing Liu 已提交
45 46
              "(2-D LoDTensor with the 2nd dim. equal to 1) "
              "Output LoDTensor of SequenceEraseOp.");
Y
Yibing Liu 已提交
47
    AddAttr<std::vector<int>>("tokens",
Y
Yibing Liu 已提交
48 49
                              "(vector<int>) Tokens need to be erased from "
                              "input sequences.");
Y
Yibing Liu 已提交
50 51 52
    AddComment(R"DOC(
Sequence Erase Operator.

53
Sequence erase operator erases tokens specified by Attr(tokens) from the input 
Y
Yibing Liu 已提交
54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76
sequences Input(X), and outputs the remaining data and modifies the LoD 
information at the same time. For example, given a 2-D LoDTensor

    X = [[2, 2, 6, 1, 3, 9, 6, 1, 0, 1]]^T

with lod = [[0, 3, 6, 10]], there are three sequences in the input:
   
     X1 = [[2, 2, 6]]^T, X2 = [[1, 3, 9]]^T and X3 = [[6, 1, 0, 1]]^T.

If the tokens to be erased are Attr(tokens) = [2, 3, 5], after the erasing 
operation, the three sequences become

    X1' = [[6]]^T, X2' = [[1, 9]]^T and X3' = [[6, 1, 0, 1]]^T.

Hence the LoDTensor Output(Out) should be

    Out = [[6, 1, 9, 6, 1, 0, 1]]^T,

with lod = [[0, 1, 3, 7]].

An example usage for this operator is to remove the special tokens when 
computing the edit distance between two strings, such as blank, start token, 
and end token.
Y
Yibing Liu 已提交
77 78 79 80 81 82 83 84 85 86 87 88
)DOC");
  }
};

}  // namespace operators
}  // namespace paddle

namespace ops = paddle::operators;
REGISTER_OP_WITHOUT_GRADIENT(sequence_erase, ops::SequenceEraseOp,
                             ops::SequenceEraseOpMaker);
REGISTER_OP_CPU_KERNEL(
    sequence_erase,
89 90
    ops::SequenceEraseKernel<paddle::platform::CPUDeviceContext, int32_t>,
    ops::SequenceEraseKernel<paddle::platform::CPUDeviceContext, int64_t>);