diff --git a/paddle/fluid/operators/fusion_seqexpand_concat_fc_op.cc b/paddle/fluid/operators/fusion_seqexpand_concat_fc_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..90aba5fe89d79b8aa7008bc803b0a646e93bd0fc --- /dev/null +++ b/paddle/fluid/operators/fusion_seqexpand_concat_fc_op.cc @@ -0,0 +1,201 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/fusion_seqexpand_concat_fc_op.h" +#include +#include "paddle/fluid/operators/math/blas.h" +#include "paddle/fluid/operators/math/cpu_vec.h" +#include "paddle/fluid/operators/math/fc_compute.h" +#include "paddle/fluid/platform/cpu_info.h" + +namespace paddle { +namespace operators { + +void FusionSeqExpandConcatFCOp::InferShape( + framework::InferShapeContext* ctx) const { + PADDLE_ENFORCE_GT( + ctx->Inputs("X").size(), 1UL, + "Inputs(X) of FusionSeqExpandConcatFCOp should larger than 1."); + PADDLE_ENFORCE( + ctx->HasInput("FCWeight"), + "Input(FCWeight) of FusionSeqExpandConcatFCOp should not be null."); + PADDLE_ENFORCE( + ctx->HasOutput("Out"), + "Output(Out) of FusionSeqExpandConcatFCOp should not be null."); + PADDLE_ENFORCE( + ctx->HasOutput("FCOut"), + "Output(FCOut) of FusionSeqExpandConcatFCOp should not be null."); + + auto ins_dims = ctx->GetInputsDim("X"); + auto w_dims = ctx->GetInputDim("FCWeight"); // (M0+M1+M2+..) x D + PADDLE_ENFORCE_EQ(w_dims.size(), 2UL, "Input(FCWeight)'s rank must be 2."); + const int D = w_dims[1]; + int sum = ins_dims[0][1]; + for (size_t i = 1; i < ins_dims.size(); ++i) { + sum += ins_dims[i][1]; + } + PADDLE_ENFORCE_EQ(sum, w_dims[0], + "FC height should be sum of all inputs width."); + if (ctx->HasInput("FCBias")) { + auto b_dims = ctx->GetInputDim("FCBias"); + PADDLE_ENFORCE_EQ(b_dims.size(), 2, "Input(FCBias)'s rank must be 2."); + PADDLE_ENFORCE_EQ(b_dims[0], 1, "FCBias shapes must be 1 * %d.", D); + PADDLE_ENFORCE_EQ(b_dims[1], D, "FCBias shapes must be 1 * %d.", D); + } + + ctx->SetOutputDim("Out", {ins_dims[0][0], D}); + // fcout should be reshape when run since can not get lod in infershape + // explicit share the ref lod + ctx->ShareLoD("X", "Out", 0); +} + +framework::OpKernelType FusionSeqExpandConcatFCOp::GetExpectedKernelType( + const framework::ExecutionContext& ctx) const { + return framework::OpKernelType( + framework::ToDataType(ctx.MultiInput("X")[0]->type()), + ctx.device_context()); +} + +void FusionSeqExpandConcatFCOpMaker::Make() { + AddInput("X", + "(LoDTensor) input LodDTensors, the first one must be have ref lod " + "for sequence expand, and the rest input should have same lod.") + .AsDuplicable(); + AddInput("FCWeight", "(Tensor) the weights of fc."); + AddInput("FCBias", "(Tensor, optional) the bias of fc.").AsDispensable(); + AddOutput("Out", "(LoDTensor) Output LodTensor."); + AddOutput( + "FCOut", + "(Tensor) the intermediate tensor to keep the result of fc." + "Shape is (N x D), where N is the batch size, D is the output dim of fc") + .AsIntermediate(); + AddAttr("fc_activation", + "(string, default: identity)" + "The activation for the result of fc." + "`identity` by default.") + .SetDefault("identity") + .InEnum({"sigmoid", "tanh", "relu", "identity"}); + AddComment(R"DOC( +Fusion Sequence expand + concat + fc Operator. + +All below conditions should be meet: + +The ref_level of seq_expand should be 0. + +The ref lod of seq_expand level is the first input of concat. + +The other inputs should have same lod and same batch size of ref lod. + +The seq len of other inputs should be 1. + +The concat axis should be 1. + +)DOC"); +} + +template +class FusionSeqExpandConcatFCOpKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + using DeviceContext = paddle::platform::CPUDeviceContext; + auto ins = ctx.MultiInput("X"); + auto* w = ctx.Input("FCWeight"); + auto* b = ctx.Input("FCBias"); + auto* out = ctx.Output("Out"); + auto* fc_out = ctx.Output("FCOut"); + + auto* ref_in = ins[0]; + auto ref_lod = ref_in->lod(); + auto in1_lod = ins[1]->lod(); + auto ref_dims = ref_in->dims(); // T x M0 + auto in1_dims = ins[1]->dims(); // N x M1 + auto w_dims = w->dims(); + const int N = ref_lod[0].size() - 1; + const int total_T = ref_dims[0]; + const int M0 = ref_dims[1]; + const int M1 = in1_dims[1]; + const int D = w_dims[1]; + + // some check and fcout should be reshape here + // since infershape can not get lod info + PADDLE_ENFORCE_EQ(ref_lod.size(), 1UL, "Only support input lod size is 1."); + PADDLE_ENFORCE_EQ(in1_lod.size(), 1UL, "Only support input lod size is 1."); + PADDLE_ENFORCE_EQ(in1_lod[0].size() - 1, N, + "Batch size of all inputs should be equal."); + PADDLE_ENFORCE_EQ(in1_lod[0][N], N, + "Seq_length of other inputs should be 1."); + PADDLE_ENFORCE_EQ(in1_dims[0], N, "input height should be batch size."); + for (size_t i = 2; i < ins.size(); ++i) { + PADDLE_ENFORCE_EQ(ins[i]->dims()[0], N, + "All other inputs height should be equal"); + PADDLE_ENFORCE_EQ(ins[i]->lod(), in1_lod, + "All other inputs should have same lod"); + } + fc_out->Resize({N, D}); + + std::function fc_act; + auto& fc_act_str = ctx.Attr("fc_activation"); + if (platform::jit::MayIUse(platform::jit::avx)) { + math::VecActivations act_functor; + fc_act = act_functor(fc_act_str); + } else { + math::VecActivations act_functor; + fc_act = act_functor(fc_act_str); + } + + const T* ref_in_data = ref_in->data(); + const T* in1_data = ins[1]->data(); + const T* w_data = w->data(); + T* out_data = out->mutable_data(ctx.GetPlace()); + T* fc_out_data = fc_out->mutable_data(ctx.GetPlace()); + + auto blas = math::GetBlas(ctx); + math::FCCompute(blas, total_T, D, M0, ref_in_data, w_data, + out_data, b ? b->data() : NULL); + w_data = w_data + M0 * D; + // first write on + blas.MatMul(N, D, M1, in1_data, w_data, fc_out_data); + w_data = w_data + M1 * D; + for (size_t i = 2; i < ins.size(); ++i) { + // add on + const T* in_data = ins[i]->data(); + const int K = ins[i]->dims()[1]; + blas.GEMM(CblasNoTrans, CblasNoTrans, N, D, K, static_cast(1), in_data, + K, w_data, D, static_cast(1), fc_out_data, D); + w_data = w_data + K * D; + } + T* cur_out_data = out_data; + for (int i = 0; i < N; ++i) { + int seq_len = ref_lod[0][i + 1] - ref_lod[0][i]; + T* src = fc_out_data + i * D; + for (int step = 0; step < seq_len; ++step) { + blas.VADD(D, cur_out_data, src, cur_out_data); + cur_out_data = cur_out_data + D; + } + } + fc_act(total_T * D, out_data, out_data); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OPERATOR(fusion_seqexpand_concat_fc, ops::FusionSeqExpandConcatFCOp, + ops::FusionSeqExpandConcatFCOpMaker, + paddle::framework::DefaultGradOpDescMaker); + +REGISTER_OP_CPU_KERNEL(fusion_seqexpand_concat_fc, + ops::FusionSeqExpandConcatFCOpKernel, + ops::FusionSeqExpandConcatFCOpKernel); diff --git a/paddle/fluid/operators/fusion_seqexpand_concat_fc_op.h b/paddle/fluid/operators/fusion_seqexpand_concat_fc_op.h new file mode 100644 index 0000000000000000000000000000000000000000..f78e820f603354944bd7fc23aff2d1d72e5ba750 --- /dev/null +++ b/paddle/fluid/operators/fusion_seqexpand_concat_fc_op.h @@ -0,0 +1,42 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include "paddle/fluid/framework/op_registry.h" + +namespace paddle { +namespace operators { + +using LoDTensor = framework::LoDTensor; +using Tensor = framework::Tensor; + +class FusionSeqExpandConcatFCOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext* ctx) const override; + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override; +}; + +class FusionSeqExpandConcatFCOpMaker + : public framework::OpProtoAndCheckerMaker { + public: + void Make() override; +}; + +} // namespace operators +} // namespace paddle diff --git a/python/paddle/fluid/tests/unittests/test_fusion_seqexpand_concat_fc_op.py b/python/paddle/fluid/tests/unittests/test_fusion_seqexpand_concat_fc_op.py new file mode 100644 index 0000000000000000000000000000000000000000..aeee3a9999a94b4979fc3793150101352e50be85 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_fusion_seqexpand_concat_fc_op.py @@ -0,0 +1,139 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +from op_test import OpTest +from test_fusion_lstm_op import fc, ACTIVATION + + +def fusion_seqexpand_concat_fc(xs, lod, w, b, fc_act): + + T = sum(lod[0]) + N = len(lod[0]) + num_inputs = len(xs) + D = w.shape[1] + + expanded_inputs = [xs[0]] + for i in range(num_inputs - 1): + x = xs[i + 1] + assert x.shape[0] == N + expanded = np.repeat(x, lod[0], axis=0) + assert expanded.shape[0] == T + assert expanded.shape[1] == x.shape[1] + expanded_inputs.append(expanded) + + fc_input = np.concatenate(expanded_inputs, axis=1) + assert fc_input.shape[0] == T + assert fc_input.shape[1] == w.shape[0] + fc_out = fc(fc_input, w, b) + fc_out = fc_act(fc_out) + assert fc_out.shape[0] == T + assert fc_out.shape[1] == D + return fc_out + + +class TestFusionSeqExpandConcatFCOp(OpTest): + def set_conf(self): + pass + + def setUp(self): + self.op_type = 'fusion_seqexpand_concat_fc' + self.lod = [[3, 5, 8, 2]] + self.inputs_M = [15, 10, 10] + self.D = 20 + self.with_bias = True + self.fc_act = 'relu' + self.set_conf() + + T = sum(self.lod[0]) + bs = len(self.lod[0]) + num_inputs = len(self.inputs_M) + + x0 = np.random.normal(size=(T, self.inputs_M[0])).astype('float32') + xs = [x0] + for i in range(num_inputs - 1): + xi = np.random.normal(size=(bs, + self.inputs_M[i + 1])).astype('float32') + xs.append(xi) + + # fc weight and bias + w = np.random.normal(size=(sum(self.inputs_M), + self.D)).astype('float32') + b = np.random.normal(size=( + 1, self.D)).astype('float32') if self.with_bias else np.zeros( + (1, self.D)).astype('float32') + + out = fusion_seqexpand_concat_fc(xs, self.lod, w, b, + ACTIVATION[self.fc_act]) + + self.inputs = {'X': [('x0', (x0, self.lod))], 'FCWeight': w} + normal_lod = [[1] * bs] + for i in range(num_inputs - 1): + self.inputs['X'].append(('x%d' % (i + 1), (xs[i + 1], normal_lod))) + + if self.with_bias: + self.inputs['FCBias'] = b + + self.outputs = {'Out': (out, self.lod)} + self.attrs = {'fc_activation': self.fc_act} + + def test_check_output(self): + self.check_output() + + +class TestFusionSECFCOpNonBias(TestFusionSeqExpandConcatFCOp): + def set_conf(self): + self.with_bias = False + + +class TestFusionSECFCOpNonAct(TestFusionSeqExpandConcatFCOp): + def set_conf(self): + self.fc_act = 'identity' + + +class TestFusionSECFCOpMD1(TestFusionSeqExpandConcatFCOp): + def set_conf(self): + self.inputs_M = [3, 4, 2, 1, 5] + self.D = 8 + + +class TestFusionSECFCOpMD2(TestFusionSeqExpandConcatFCOp): + def set_conf(self): + self.lod = [[5, 6]] + self.inputs_M = [1, 1] + + +class TestFusionSECFCOpBS1_1(TestFusionSeqExpandConcatFCOp): + def set_conf(self): + self.lod = [[1]] + self.inputs_M = [3, 4, 2] + + +class TestFusionSECFCOpBS1_2(TestFusionSeqExpandConcatFCOp): + def set_conf(self): + self.lod = [[1]] + self.inputs_M = [3, 4] + + +class TestFusionSECFCOpBS1_3(TestFusionSeqExpandConcatFCOp): + def set_conf(self): + self.lod = [[5]] + self.inputs_M = [6, 3] + + +if __name__ == '__main__': + unittest.main()