diff --git a/paddle/operators/sequence_concat_op.cc b/paddle/operators/sequence_concat_op.cc index 02961d00ec750e4e48dbf4a217e10e9c252f409a..d385e47b6c69500b141848067e4be58dd7caf102 100644 --- a/paddle/operators/sequence_concat_op.cc +++ b/paddle/operators/sequence_concat_op.cc @@ -23,18 +23,19 @@ class SequenceConcatOp : public framework::OperatorWithKernel { protected: void InferShape(framework::InferShapeContextBase* ctx) const override { - PADDLE_ENFORCE_GT(ctx->Inputs("X").size(), 0UL, - "Inputs(X) of SequenceConcatOp should not be empty."); + PADDLE_ENFORCE(ctx->HasInputs("X"), + "Inputs(X) of SequenceConcatOp should not be null."); PADDLE_ENFORCE(ctx->HasOutput("Out"), "Output(Out) of SequenceConcatOp should not be null."); const size_t level = static_cast(ctx->Attrs().Get("level")); const size_t axis = static_cast(ctx->Attrs().Get("axis")); PADDLE_ENFORCE(level == 0UL || level == 1UL, - "Sequence Concat Op only support one or two sequence now."); + "The sequence_concat operator only accepts sequence " + "or a nested sequence as its input."); auto ins_dims = ctx->GetInputsDim("X"); framework::DDim out_dims = ins_dims[0]; const size_t n = ins_dims.size(); - for (size_t i = 1; i < n; i++) { + for (size_t i = 1; i < n; ++i) { out_dims[axis] += ins_dims[i][axis]; } ctx->SetOutputDim("Out", out_dims); @@ -47,33 +48,40 @@ class SequenceConcatOpMaker : public framework::OpProtoAndCheckerMaker { framework::OpAttrChecker* op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", - "Multip LodTensors, the variable-length inputs of " - "SequenceConcatOp") + "The input Multip LoDTensors, which are variable-length " + "sequence or nested sequence.") .AsDuplicable(); AddOutput("Out", - "A float LodTensor, the variable-length output of " - "SequenceConcatOp."); + "A LoDTensor, the variable-length output of " + "sequence_concat Op."); AddAttr("axis", + "(int, default 0)" "The axis which the inputs will be joined with." - "If axis is 0, the inputs will be joined with Lod index.") + "If axis is 0, the inputs will be joined with LoD index.") .SetDefault(0); AddAttr("level", + "(int, default 0)" "The level which the inputs will be joined with." - "If level is 0, the inputs will be joined with word." - "If level is 1, the inputs will be joined with sentence.") + "If level is 0, the inputs will be joined with " + "nested sequences." + "If level is 1, the inputs will be joined with sequences.") .SetDefault(0); AddComment(R"DOC( - SequenceConcatOp concat multip LodTensors and only supports one or two levels. + The sequence_concat operator concatenates multiple LoDTensors. + It only supports sequences ( LoD Tensor with level=1) + or nested sequences (LoD tensor with level=0) as its inputs. - Case1: - axis is 1, level is 1, the Lod of Inputs are the same, + If the axis is 1, level is 1, the LoD of Inputs are the same, LoD(x0) = {{0,2,4},{0,1,2,3,4}}; Dims(x0) = (2,3,4) LoD(x1) = {{0,2,4},{0,1,2,3,4}}; Dims(x1) = (2,4,4) - LoD(Out) = {{0,2,4},{01,2,3,4}}; Dims(Out) = (2,7,4) + LoD(Out) = {{0,2,4},{0,1,2,3,4}}; Dims(Out) = (2,7,4) - Case2: - If axis is 0, level is 1, the Lod of inputs are different, + If the axis is 0, level is 1, the LoD of inputs are different, LoD(x0) = {{0,2,4}, {0,1,2,3,4}}; Dims(x0) = (2,3,4) LoD(x1) = {{0,3,5}, {0,1,3,4,5}}; Dims(x1) = (3,3,4) LoD(Out) = {{0,5,9}, {0,1,2,4,5,6,7,8,9}}; Dims(Out) = (5,3,4) + + NOTE: The level of all the inputs should be the same. )DOC"); } }; @@ -85,9 +93,9 @@ class SequenceConcatGradOp : public framework::OperatorWithKernel { protected: void InferShape(framework::InferShapeContextBase* ctx) const override { PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")), - "Gradient of Out should not be null."); - PADDLE_ENFORCE_GT(ctx->Outputs(framework::GradVarName("X")).size(), 0UL, - "Gradient of X should not be empty.") + "The gradient of Out should not be null."); + PADDLE_ENFORCE(ctx->HasOutputs(framework::GradVarName("X")), + "The gradient of X should not be empty."); ctx->SetOutputsDim(framework::GradVarName("X"), ctx->GetInputsDim("X")); } }; diff --git a/paddle/operators/sequence_concat_op.h b/paddle/operators/sequence_concat_op.h index 79e372a797b2a59ddc8b2f443443711ab7a09e39..7f9c91b3c8d0589397ae4a7d2f08fb89b6cfdcfe 100644 --- a/paddle/operators/sequence_concat_op.h +++ b/paddle/operators/sequence_concat_op.h @@ -23,7 +23,7 @@ using Tensor = framework::Tensor; using LoDTensor = framework::LoDTensor; using LoD = framework::LoD; -// Concat Lod, the initialized Lod of Output is lod(x0), +// Concat LoD, the initialized LoD of Output is lod(x0), // if axis is not 0, the LoD(Out) will be the same as Inputs, if axis is 0: // Case1: // There is one level, the Output LoD will be modified: @@ -37,26 +37,26 @@ using LoD = framework::LoD; // LoD(x1) = {{0,3,5}, {0,1,3,4,5}} // LoD(Out) = {{0,5,9}, {0,1,2,4,5,6,7,8,9}} template -LoD concatLod(const std::vector ins, const size_t axis, +LoD concatLoD(const std::vector ins, const size_t axis, const size_t level) { auto out_lod = ins[0]->lod(); const size_t n = ins.size(); if (axis == 0UL) { if (level == 0) { - for (size_t i = 1; i < n; i++) { - for (size_t j = 0; j < ins[i]->lod()[0].size(); j++) { + for (size_t i = 1; i < n; ++i) { + for (size_t j = 0; j < ins[i]->lod()[0].size(); ++j) { out_lod[0][j] += ins[i]->lod()[0][j]; } } } else if (level == 1) { - for (size_t i = 1; i < n; i++) { - PADDLE_ENFORCE_EQ(ins[i]->NumLevels(), 2UL, - "All the LoDTensors of Inputs(X) should " - "have two level."); - for (size_t j = 0; j < ins[i]->lod()[0].size(); j++) { + PADDLE_ENFORCE_EQ(ins[0]->NumLevels(), 2UL, + "If the level is 1, all of the inputs " + "should be the the nested sequence."); + for (size_t i = 1; i < n; ++i) { + for (size_t j = 0; j < ins[i]->lod()[0].size(); ++j) { out_lod[0].push_back(ins[i]->lod()[0][j]); } - for (size_t j = 0; j < ins[i]->lod()[1].size(); j++) { + for (size_t j = 0; j < ins[i]->lod()[1].size(); ++j) { out_lod[1][j] += ins[i]->lod()[1][j]; } } @@ -66,7 +66,7 @@ LoD concatLod(const std::vector ins, const size_t axis, } template -class SequenceConcatOpKernel : public framework::OpKernel { +class SequenceConcatOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { auto ins = ctx.MultiInput("X"); @@ -74,18 +74,37 @@ class SequenceConcatOpKernel : public framework::OpKernel { const size_t axis = static_cast(ctx.Attr("axis")); const size_t level = static_cast(ctx.Attr("level")); const size_t n = ins.size(); + + for (size_t i = 1; i < n; ++i) { + PADDLE_ENFORCE_EQ(ins[0]->NumLevels(), ins[i]->NumLevels(), + "The level number of all the input LoDTensors " + "should be the same."); + PADDLE_ENFORCE_EQ(ins[0]->dims().size(), ins[i]->dims().size(), + "The dimensions size of all the input LoDTensors " + "should be the same."); + + const size_t dims_size = ins[i]->dims().size(); + for (size_t j = 0; j < dims_size; ++j) { + if (j == axis) continue; + PADDLE_ENFORCE_EQ(ins[0]->dims()[j], ins[i]->dims()[j], + "The dimensions of all the input LoDTensors " + "except for the specify axis should be " + "matched exactly."); + } + } + out->mutable_data(ctx.GetPlace()); - auto out_lod = concatLod(ins, axis, level); + auto out_lod = concatLoD(ins, axis, level); out->set_lod(out_lod); auto out_lod_level = out_lod[level]; - for (size_t i = 0; i < out_lod_level.size() - 1; i++) { + for (size_t i = 0; i < out_lod_level.size() - 1; ++i) { Tensor out_t = out->Slice(static_cast(out_lod_level[i]), static_cast(out_lod_level[i + 1])); auto out_stride = framework::stride(out_t.dims()); size_t offset = 0; - for (size_t j = 0; j < n; j++) { + for (size_t j = 0; j < n; ++j) { auto in_lod_level = ins[j]->lod()[level]; auto in_stride = framework::stride(ins[j]->dims()); Tensor in_t = ins[j]->Slice(static_cast(in_lod_level[i]), @@ -100,7 +119,7 @@ class SequenceConcatOpKernel : public framework::OpKernel { }; template -class SequenceConcatGradOpKernel : public framework::OpKernel { +class SequenceConcatGradOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { auto ins = ctx.MultiInput("X"); @@ -118,17 +137,17 @@ class SequenceConcatGradOpKernel : public framework::OpKernel { x_grads[i]->mutable_data(ctx.GetPlace()); } - auto out_lod = concatLod(ins, axis, level); + auto out_lod = concatLoD(ins, axis, level); auto out_lod_level = out_lod[level]; - for (size_t i = 0; i < out_lod_level.size() - 1; i++) { + for (size_t i = 0; i < out_lod_level.size() - 1; ++i) { Tensor out_grad_t = out_grad->Slice(static_cast(out_lod_level[i]), static_cast(out_lod_level[i + 1])); auto out_grad_stride = framework::stride(out_grad_t.dims()); size_t offset = 0; - for (size_t j = 0; j < n; j++) { + for (size_t j = 0; j < n; ++j) { auto x_grad_lod_level = x_grads[j]->lod()[level]; auto x_grad_stride = framework::stride(x_grads[j]->dims()); Tensor x_grad_t =