From f3cd2612aeda96377cd8822001489d6ec173ef37 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Thu, 16 Aug 2018 00:01:27 +0800 Subject: [PATCH] refine fc and use the fc compute in fusion_lstm --- paddle/fluid/operators/fc_op.cc | 31 +++---------- paddle/fluid/operators/fusion_lstm_op.cc | 30 +++---------- paddle/fluid/operators/math/fc_compute.h | 43 +++++++++++++++++++ .../fluid/tests/unittests/test_fc_op.py | 34 ++++++++++++--- 4 files changed, 83 insertions(+), 55 deletions(-) create mode 100644 paddle/fluid/operators/math/fc_compute.h diff --git a/paddle/fluid/operators/fc_op.cc b/paddle/fluid/operators/fc_op.cc index 099ca52c8e9..5bee0e480ed 100644 --- a/paddle/fluid/operators/fc_op.cc +++ b/paddle/fluid/operators/fc_op.cc @@ -15,8 +15,7 @@ limitations under the License. */ #include "paddle/fluid/operators/fc_op.h" #include #include "paddle/fluid/operators/math/blas.h" - -DECLARE_int32(paddle_num_threads); +#include "paddle/fluid/operators/math/fc_compute.h" namespace paddle { namespace operators { @@ -110,13 +109,8 @@ void FCOpMaker::Make() { AddComment(R"DOC( Fully Connected Operator. - The fully connected operation calculates the output based on the input, weights and bias attribute. + The fully connected operation calculates the output based on the input, weights and bias. The size of each dimension of the parameters checked in the infer-shape. - The matrix of bias is generated by the mkldnn framework, when the bias_attr is True. - Additional parametrs are use_mkldnn and bias_attr. - The input(X) size and output(Out) size may be diffrent. - - The fully connected layer only supports MKLDNN version )DOC"); } @@ -133,26 +127,13 @@ class FCOpKernel : public framework::OpKernel { auto in_dims = input->dims(); auto w_dims = w->dims(); - auto& dev_ctx = ctx.template device_context(); - auto blas = math::GetBlas(dev_ctx); const T* input_data = input->data(); const T* w_data = w->data(); T* output_data = output->mutable_data(ctx.GetPlace()); - - blas.GEMM(CblasNoTrans, CblasNoTrans, in_dims[0], w_dims[1], w_dims[0], - static_cast(1), input_data, w_data, static_cast(0), - output_data); - - if (bias) { - const T* bias_data = bias->data(); -#ifdef PADDLE_WITH_MKLML -#pragma omp parallel for if (FLAGS_paddle_num_threads > 1) -#endif - for (int bs = 0; bs < in_dims[0]; bs++) { - blas.AXPY(w_dims[1], static_cast(1), bias_data, - output_data + bs * w_dims[1]); - } - } + auto blas = math::GetBlas(ctx); + math::FCCompute( + blas, in_dims[0], w_dims[1], w_dims[0], input_data, w_data, output_data, + bias ? bias->data() : NULL); } }; diff --git a/paddle/fluid/operators/fusion_lstm_op.cc b/paddle/fluid/operators/fusion_lstm_op.cc index fc2272acd4d..b937f2edf19 100644 --- a/paddle/fluid/operators/fusion_lstm_op.cc +++ b/paddle/fluid/operators/fusion_lstm_op.cc @@ -16,9 +16,9 @@ limitations under the License. */ #include #include "paddle/fluid/operators/math/blas.h" #include "paddle/fluid/operators/math/detail/activation_functions.h" +#include "paddle/fluid/operators/math/fc_compute.h" #include "paddle/fluid/operators/math/lstm_compute.h" #include "paddle/fluid/operators/math/sequence2batch.h" -DECLARE_int32(paddle_num_threads); namespace paddle { namespace operators { @@ -205,23 +205,6 @@ inline void ReorderInitState(const DeviceContext& ctx, row_shuffle(ctx, src, index_lod, dst, indexed_src); } -// TODO(TJ): can move to math::details -template -inline void SimpleFC(const math::BlasT& blas, const int M, - const int N, const int K, const T* A, const T* B, T* C, - const T* bias_data = NULL) { - blas.GEMM(CblasNoTrans, CblasNoTrans, M, N, K, static_cast(1), A, B, - static_cast(0), C); - if (bias_data) { -#ifdef PADDLE_WITH_MKLML -#pragma omp parallel for if (FLAGS_paddle_num_threads > 1) -#endif - for (int i = 0; i < M; i++) { - blas.AXPY(N, static_cast(1), bias_data, C + i * N); - } - } -} - template class FuisonLSTMKernel : public framework::OpKernel { public: @@ -253,14 +236,15 @@ class FuisonLSTMKernel : public framework::OpKernel { auto& dev_ctx = ctx.template device_context(); auto blas = math::GetBlas(dev_ctx); if (x_dims[1] > wx_dims[1]) { - SimpleFC(blas, x_dims[0], wx_dims[1], x_dims[1], x_data, - wx_data, xx_data, bias->data()); + math::FCCompute(blas, x_dims[0], wx_dims[1], x_dims[1], + x_data, wx_data, xx_data, + bias->data()); to_batch(dev_ctx, *xx, batched_gate, true, is_reverse); } else { to_batch(dev_ctx, *x, xx, true, is_reverse); - SimpleFC(blas, x_dims[0], wx_dims[1], x_dims[1], - xx_data, wx_data, batched_gate_data, - bias->data()); + math::FCCompute(blas, x_dims[0], wx_dims[1], x_dims[1], + xx_data, wx_data, batched_gate_data, + bias->data()); } int frame_size = static_cast(wx_dims[1] / 4); diff --git a/paddle/fluid/operators/math/fc_compute.h b/paddle/fluid/operators/math/fc_compute.h new file mode 100644 index 00000000000..8600fa9e2c4 --- /dev/null +++ b/paddle/fluid/operators/math/fc_compute.h @@ -0,0 +1,43 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/fluid/operators/math/blas.h" + +DECLARE_int32(paddle_num_threads); + +namespace paddle { +namespace operators { +namespace math { + +template +inline void FCCompute(const BlasT& blas, const int M, + const int N, const int K, const T* X, const T* W, T* Y, + const T* B = NULL) { + blas.GEMM(CblasNoTrans, CblasNoTrans, M, N, K, static_cast(1), X, W, + static_cast(0), Y); + if (B) { +#ifdef PADDLE_WITH_MKLML +#pragma omp parallel for if (FLAGS_paddle_num_threads > 1) +#endif + for (int i = 0; i < M; i++) { + blas.AXPY(N, static_cast(1), B, Y + i * N); + } + } +} + +} // namespace math +} // namespace operators +} // namespace paddle diff --git a/python/paddle/fluid/tests/unittests/test_fc_op.py b/python/paddle/fluid/tests/unittests/test_fc_op.py index 2bb920710a9..ff417ad2f16 100644 --- a/python/paddle/fluid/tests/unittests/test_fc_op.py +++ b/python/paddle/fluid/tests/unittests/test_fc_op.py @@ -64,27 +64,47 @@ class TestFCOp(OpTest): self.check_output() -class TestFCOpBiasBoth(TestFCOp): +class TestFCOpNoBias(TestFCOp): def init_shapes(self, mb, ic, oc, h, w): - for with_bias in {True, False}: - self.with_bias = with_bias - self.matrix = MatrixGenerate(mb, ic, oc, h, w) + self.with_bias = False + self.matrix = MatrixGenerate(mb, ic, oc, h, w) -class TestFCOp1(TestFCOpBiasBoth): +class TestFCOpWithBias(TestFCOp): + def init_shapes(self, mb, ic, oc, h, w): + self.with_bias = True + self.matrix = MatrixGenerate(mb, ic, oc, h, w) + + +class TestFCOp1(TestFCOpNoBias): def init_op_type(self): self.init_shapes(2, 8, 10, 1, 1) -class TestFCOp2(TestFCOpBiasBoth): +class TestFCOp2(TestFCOpNoBias): def init_op_type(self): self.init_shapes(4, 5, 6, 2, 2) -class TestFCOp4(TestFCOpBiasBoth): +class TestFCOp4(TestFCOpNoBias): def init_op_type(self): self.init_shapes(1, 32, 64, 3, 3) +class TestFCOpWithBias1(TestFCOpWithBias): + def init_op_type(self): + self.init_shapes(3, 8, 10, 2, 1) + + +class TestFCOpWithBias2(TestFCOpWithBias): + def init_op_type(self): + self.init_shapes(4, 5, 6, 2, 2) + + +class TestFCOpWithBias3(TestFCOpWithBias): + def init_op_type(self): + self.init_shapes(1, 64, 32, 3, 3) + + if __name__ == "__main__": unittest.main() -- GitLab