From a56142c1550c5338c942f0c88e3ad96cc0305adc Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Thu, 16 Aug 2018 17:01:54 +0800 Subject: [PATCH] optimize elementwise_mul cpu forward --- paddle/fluid/operators/elementwise_mul_op.h | 40 +++++++++++++++++++-- 1 file changed, 37 insertions(+), 3 deletions(-) diff --git a/paddle/fluid/operators/elementwise_mul_op.h b/paddle/fluid/operators/elementwise_mul_op.h index dc73cb6f236..329d2d129a9 100644 --- a/paddle/fluid/operators/elementwise_mul_op.h +++ b/paddle/fluid/operators/elementwise_mul_op.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once #include "paddle/fluid/operators/elementwise_op_function.h" +#include "paddle/fluid/operators/math/blas.h" namespace paddle { namespace operators { @@ -23,6 +24,37 @@ struct MulFunctor { inline HOSTDEVICE T operator()(T a, T b) const { return a * b; } }; +template +void default_elementwise_mul(const framework::ExecutionContext& ctx, + const framework::Tensor* x, + const framework::Tensor* y, framework::Tensor* z) { + int axis = ctx.Attr("axis"); + ElementwiseComputeEx, DeviceContext, T>(ctx, x, y, axis, + MulFunctor(), z); +} + +template +typename std::enable_if< + std::is_floating_point::value && + std::is_same::value>::type +elementwise_mul(const framework::ExecutionContext& ctx, + const framework::Tensor* x, const framework::Tensor* y, + framework::Tensor* z) { + auto blas = math::GetBlas(ctx); + blas.VMUL(x->numel(), x->data(), y->data(), + z->mutable_data(ctx.GetPlace())); +} + +template +typename std::enable_if< + !std::is_floating_point::value || + !std::is_same::value>::type +elementwise_mul(const framework::ExecutionContext& ctx, + const framework::Tensor* x, const framework::Tensor* y, + framework::Tensor* z) { + default_elementwise_mul(ctx, x, y, z); +} + template class ElementwiseMulKernel : public framework::OpKernel { public: @@ -33,9 +65,11 @@ class ElementwiseMulKernel : public framework::OpKernel { auto* y = ctx.Input("Y"); auto* z = ctx.Output("Out"); z->mutable_data(ctx.GetPlace()); - int axis = ctx.Attr("axis"); - ElementwiseComputeEx, DeviceContext, T>(ctx, x, y, axis, - MulFunctor(), z); + if (x->numel() == y->numel()) { + elementwise_mul(ctx, x, y, z); + } else { + default_elementwise_mul(ctx, x, y, z); + } } }; -- GitLab