diff --git a/paddle/operators/sequence_concat_op.cc b/paddle/operators/sequence_concat_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..287fb1942e4a2b17f6d51c9a6b7f6fb71fbaa601 --- /dev/null +++ b/paddle/operators/sequence_concat_op.cc @@ -0,0 +1,129 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/sequence_concat_op.h" + +namespace paddle { +namespace operators { + +class SequenceConcatOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(ctx->HasInputs("X"), + "Inputs(X) of SequenceConcatOp should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("Out"), + "Output(Out) of SequenceConcatOp should not be null."); + const size_t level = static_cast(ctx->Attrs().Get("level")); + const size_t axis = static_cast(ctx->Attrs().Get("axis")); + PADDLE_ENFORCE(level == 0UL || level == 1UL, + "The sequence_concat operator only accepts sequence " + "or a nested sequence as its input."); + auto ins_dims = ctx->GetInputsDim("X"); + framework::DDim out_dims = ins_dims[0]; + const size_t n = ins_dims.size(); + for (size_t i = 1; i < n; ++i) { + out_dims[axis] += ins_dims[i][axis]; + } + ctx->SetOutputDim("Out", out_dims); + } +}; + +class SequenceConcatOpMaker : public framework::OpProtoAndCheckerMaker { + public: + SequenceConcatOpMaker(framework::OpProto* proto, + framework::OpAttrChecker* op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", + "(A vector of LoDTensor), the input is a vector of LoDTensor, " + "each of which is a variable-length sequence or nested sequence.") + .AsDuplicable(); + AddOutput("Out", + "(A LoDTensor), the variable-length output of " + "sequence_concat Op."); + AddAttr("axis", + "(int, default 0)" + "The axis which the inputs will be joined with. " + "If axis is 0, the inputs will be joined with LoD index.") + .SetDefault(0); + AddAttr("level", + "(int, default 0)" + "The level at which the inputs will be joined. " + "If the level is 0, the inputs will be joined at the nested " + "sequence level. " + "If the level is 1, the inputs will be joined at the " + "sequence level. " + "The level should be less than the level number of inputs.") + .SetDefault(0); + AddComment(R"DOC( + The sequence_concat operator concatenates multiple LoDTensors. + It only supports sequence (LoD Tensor with level number is 1) + or a nested sequence (LoD tensor with level number is 2) as its input. + - Case1: + If the axis is other than 0(here, axis is 1 and level is 1), + each input should have the same LoD information and the LoD + information of the output keeps the same as the input. + + LoD(x0) = {{0,2,4}, {0,1,2,3,4}}; Dims(x0) = (4,3,4) + LoD(x1) = {{0,2,4}, {0,1,2,3,4}}; Dims(x1) = (4,4,4) + LoD(Out) = {{0,2,4}, {0,1,2,3,4}}; Dims(Out) = (4,7,4) + + - Case2: + If the axis is 0(here, leve is 0), the inputs are concatenated along + time steps, the LoD information of the output need to re-compute. + + LoD(x0) = {{0,2,4}, {0,1,2,3,4}}; Dims(x0) = (4,3,4) + LoD(x1) = {{0,3,5}, {0,1,2,3,5}}; Dims(x1) = (5,3,4) + LoD(Out) = {{0,5,9}, {0,1,2,3,4,5,6,7,9}}; Dims(Out) = (9,3,4) + + - Case3: + If the axis is 0(here, level is 1). + + LoD(x0) = {{0,2,4}, {0,1,2,3,4}}; Dims(x0) = (4,3,4) + LoD(x1) = {{0,3,5}, {0,1,3,4,5}}; Dims(x1) = (5,3,4) + LoD(Out) = {{0,5,9}, {0,2,5,7,9}}; Dims(Out) = (9,3,4) + + NOTE: The levels of all the inputs should be the same. + )DOC"); + } +}; + +class SequenceConcatGradOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")), + "The gradient of Out should not be null."); + PADDLE_ENFORCE(ctx->HasOutputs(framework::GradVarName("X")), + "The gradient of X should not be null."); + ctx->SetOutputsDim(framework::GradVarName("X"), ctx->GetInputsDim("X")); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP(sequence_concat, ops::SequenceConcatOp, ops::SequenceConcatOpMaker, + sequence_concat_grad, ops::SequenceConcatGradOp); +REGISTER_OP_CPU_KERNEL( + sequence_concat, + ops::SequenceConcatOpKernel); +REGISTER_OP_CPU_KERNEL( + sequence_concat_grad, + ops::SequenceConcatGradOpKernel); diff --git a/paddle/operators/sequence_concat_op.cu b/paddle/operators/sequence_concat_op.cu new file mode 100644 index 0000000000000000000000000000000000000000..8dc4764785871262d21a5631cc9e8b805ba84244 --- /dev/null +++ b/paddle/operators/sequence_concat_op.cu @@ -0,0 +1,25 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#define EIGEN_USE_GPU + +#include "paddle/operators/sequence_concat_op.h" + +namespace ops = paddle::operators; +REGISTER_OP_GPU_KERNEL( + sequence_concat, + ops::SequenceConcatOpKernel); +REGISTER_OP_GPU_KERNEL( + sequence_concat_grad, + ops::SequenceConcatGradOpKernel); diff --git a/paddle/operators/sequence_concat_op.h b/paddle/operators/sequence_concat_op.h new file mode 100644 index 0000000000000000000000000000000000000000..a197a05bbb881806b24f9dcce5282a4d972e3adc --- /dev/null +++ b/paddle/operators/sequence_concat_op.h @@ -0,0 +1,155 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include "paddle/framework/op_registry.h" +#include "paddle/operators/strided_memcpy.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; +using LoDTensor = framework::LoDTensor; +using LoD = framework::LoD; + +template +LoD concatLoD(const std::vector ins, const size_t axis, + const size_t level) { + auto out_lod = ins[0]->lod(); + const size_t n = ins.size(); + if (axis == 0UL) { + for (size_t i = 1; i < n; ++i) { + for (size_t j = 0; j < ins[i]->lod()[0].size(); ++j) { + out_lod[0][j] += ins[i]->lod()[0][j]; + } + + if (ins[0]->NumLevels() == 2) { + for (size_t j = 1; j < ins[i]->lod()[1].size(); ++j) { + if (level == 0UL) { + out_lod[1].push_back(out_lod[1].back() + ins[i]->lod()[1][j] - + ins[i]->lod()[1][j - 1]); + } else if (level == 1UL) { + out_lod[1][j] += ins[1]->lod()[1][j]; + } + } + } + } + } + return out_lod; +} + +template +class SequenceConcatOpKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto ins = ctx.MultiInput("X"); + auto* out = ctx.Output("Out"); + const size_t axis = static_cast(ctx.Attr("axis")); + const size_t level = static_cast(ctx.Attr("level")); + const size_t n = ins.size(); + + for (size_t i = 1; i < n; ++i) { + PADDLE_ENFORCE_EQ(ins[0]->NumLevels(), ins[i]->NumLevels(), + "The levels of all the input LoDTensors " + "should be the same."); + PADDLE_ENFORCE_EQ(ins[0]->dims().size(), ins[i]->dims().size(), + "The dimension size of all the input LoDTensors " + "should be the same."); + + const size_t dims_size = ins[i]->dims().size(); + for (size_t j = 0; j < dims_size; ++j) { + if (j == axis) continue; + PADDLE_ENFORCE_EQ(ins[0]->dims()[j], ins[i]->dims()[j], + "Except for the dimension of the specified " + "axis along which all the inputs are concatenated, " + "dimensions of all the other axises of the input " + "LoDTensors should be the same."); + } + } + PADDLE_ENFORCE_GT(ins[0]->NumLevels(), level, + "The levels of all the input LoDTensors " + "should be greater than the specify level"); + + out->mutable_data(ctx.GetPlace()); + auto out_lod = concatLoD(ins, axis, level); + out->set_lod(out_lod); + + auto out_lod_level = out_lod[level]; + for (size_t i = 0; i < out_lod_level.size() - 1; ++i) { + Tensor out_t = out->Slice(static_cast(out_lod_level[i]), + static_cast(out_lod_level[i + 1])); + auto out_stride = framework::stride(out_t.dims()); + size_t offset = 0; + + for (size_t j = 0; j < n; ++j) { + auto in_lod_level = ins[j]->lod()[level]; + auto in_stride = framework::stride(ins[j]->dims()); + Tensor in_t = ins[j]->Slice(static_cast(in_lod_level[i]), + static_cast(in_lod_level[i + 1])); + size_t axis_dim = in_t.dims()[axis]; + StridedMemcpy(ctx.device_context(), in_t.data(), in_stride, + in_t.dims(), out_stride, out_t.data() + offset); + offset += axis_dim * in_stride[axis]; + } + } + } +}; + +template +class SequenceConcatGradOpKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto ins = ctx.MultiInput("X"); + auto* out_grad = + ctx.Input(framework::GradVarName("Out")); + auto x_grads = + ctx.MultiOutput(framework::GradVarName("X")); + size_t axis = static_cast(ctx.Attr("axis")); + size_t level = static_cast(ctx.Attr("level")); + const size_t n = x_grads.size(); + + // Set Grad(X) LoD as X + for (size_t i = 0; i < n; i++) { + x_grads[i]->set_lod(ins[i]->lod()); + x_grads[i]->mutable_data(ctx.GetPlace()); + } + + auto out_lod = concatLoD(ins, axis, level); + auto out_lod_level = out_lod[level]; + + for (size_t i = 0; i < out_lod_level.size() - 1; ++i) { + Tensor out_grad_t = + out_grad->Slice(static_cast(out_lod_level[i]), + static_cast(out_lod_level[i + 1])); + auto out_grad_stride = framework::stride(out_grad_t.dims()); + size_t offset = 0; + + for (size_t j = 0; j < n; ++j) { + auto x_grad_lod_level = x_grads[j]->lod()[level]; + auto x_grad_stride = framework::stride(x_grads[j]->dims()); + Tensor x_grad_t = + x_grads[j]->Slice(static_cast(x_grad_lod_level[i]), + static_cast(x_grad_lod_level[i + 1])); + size_t axis_dim = x_grad_t.dims()[axis]; + StridedMemcpy(ctx.device_context(), out_grad_t.data() + offset, + out_grad_stride, out_grad_t.dims(), x_grad_stride, + x_grad_t.data()); + offset += axis_dim * out_grad_stride[axis]; + } + } + } +}; + +} // namespace operators +} // namespace paddle diff --git a/python/paddle/v2/framework/tests/test_seq_concat_op.py b/python/paddle/v2/framework/tests/test_seq_concat_op.py new file mode 100644 index 0000000000000000000000000000000000000000..6309b09bc98f6d529f80bfa269a0eaadd799fcbc --- /dev/null +++ b/python/paddle/v2/framework/tests/test_seq_concat_op.py @@ -0,0 +1,77 @@ +import unittest +import numpy as np +from op_test import OpTest + + +class TestConcatOp(OpTest): + def set_data(self): + # two level, batch size is 3 + x0 = np.random.random((4, 6, 3)).astype('float32') + lod0 = [[0, 2, 4], [0, 1, 2, 3, 4]] + x1 = np.random.random((4, 8, 3)).astype('float32') + lod1 = [[0, 2, 4], [0, 1, 2, 3, 4]] + axis = 1 + level = 1 + self.inputs = {'X': [('x0', (x0, lod0)), ('x1', (x1, lod1))]} + self.attrs = {'axis': axis, 'level': level} + outs = [] + for i in range(4): + sub_x0 = x0[lod0[level][i]:lod0[level][i + 1], :] + sub_x1 = x1[lod1[level][i]:lod1[level][i + 1], :] + outs.append(np.concatenate((sub_x0, sub_x1), axis=axis)) + + self.outputs = {'Out': np.concatenate(outs, axis=0)} + + def setUp(self): + self.op_type = "sequence_concat" + self.set_data() + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(['x0'], 'Out') + + +class TestConcatOpDiffLod(TestConcatOp): + def set_data(self): + # two level, batch size is 3 + x0 = np.random.random((4, 6, 3)).astype('float32') + lod0 = [[0, 2, 4], [0, 1, 2, 3, 4]] + x1 = np.random.random((5, 6, 3)).astype('float32') + lod1 = [[0, 3, 5], [0, 1, 2, 3, 5]] + axis = 0 + level = 1 + self.inputs = {'X': [('x0', (x0, lod0)), ('x1', (x1, lod1))]} + self.attrs = {'axis': axis, 'level': level} + outs = [] + for i in range(4): + sub_x0 = x0[lod0[level][i]:lod0[level][i + 1], :] + sub_x1 = x1[lod1[level][i]:lod1[level][i + 1], :] + outs.append(np.concatenate((sub_x0, sub_x1), axis=axis)) + + self.outputs = {'Out': np.concatenate(outs, axis=0)} + + +class TestConcatOpLevelZero(TestConcatOp): + def set_data(self): + # two level, batch size is 3 + x0 = np.random.random((4, 3, 4)).astype('float32') + lod0 = [[0, 2, 4], [0, 1, 2, 3, 4]] + x1 = np.random.random((5, 3, 4)).astype('float32') + lod1 = [[0, 3, 5], [0, 1, 3, 4, 5]] + axis = 0 + level = 0 + self.inputs = {'X': [('x0', (x0, lod0)), ('x1', (x1, lod1))]} + self.attrs = {'axis': axis, 'level': level} + outs = [] + for i in range(2): + sub_x0 = x0[lod0[level][i]:lod0[level][i + 1], :] + sub_x1 = x1[lod1[level][i]:lod1[level][i + 1], :] + outs.append(np.concatenate((sub_x0, sub_x1), axis=axis)) + + self.outputs = {'Out': np.concatenate(outs, axis=0)} + + +if __name__ == '__main__': + unittest.main()