diff --git a/paddle/fluid/operators/elementwise_mul_op.h b/paddle/fluid/operators/elementwise_mul_op.h index 46d69ed87d4c2eeae674730b00858b6358b97773..e2b59b31120964d49d2de29ed327ffe0e6d44ddc 100644 --- a/paddle/fluid/operators/elementwise_mul_op.h +++ b/paddle/fluid/operators/elementwise_mul_op.h @@ -40,80 +40,14 @@ class ElementwiseMulKernel : public framework::OpKernel { }; template -struct ElementwiseMulGradFunctor { - template - void operator()(Device d, X x, Y y, Z z, dX dx, dY dy, dZ dz) { - auto x_e = framework::EigenVector::Flatten(*x); - auto y_e = framework::EigenVector::Flatten(*y); - auto dz_e = framework::EigenVector::Flatten(*dz); - - if (dx) { - auto dx_e = framework::EigenVector::Flatten(*dx); - dx_e.device(d) = dz_e * y_e; - } - - if (dy) { - auto dy_e = framework::EigenVector::Flatten(*dy); - dy_e.device(d) = x_e * dz_e; - } - } +struct IdentityGrad_DX { + HOSTDEVICE T operator()(T x, T y, T out, T dout) const { return dout * y; } }; template -struct ElementwiseMulBroadCastGradFunctor { - template - void operator()(Device d, X x, Y y, Z z, dX dx, dY dy, dZ dz, Pre pre, N n) { - auto x_e = framework::EigenVector::Flatten(*x); - auto y_e = framework::EigenVector::Flatten(*y); - auto dz_e = framework::EigenVector::Flatten(*dz); - - auto y_e_bcast = y_e.reshape(Eigen::DSizes(1, n)) - .broadcast(Eigen::DSizes(pre, 1)) - .reshape(Eigen::DSizes(x_e.size())); - - if (dx) { - auto dx_e = framework::EigenVector::Flatten(*dx); - dx_e.device(d) = dz_e * y_e_bcast; - } - - if (dy) { - auto dy_e = framework::EigenVector::Flatten(*dy); - dy_e.device(d) = (x_e * dz_e) - .reshape(Eigen::DSizes(pre, n)) - .sum(Eigen::array{{0}}); - } - } +struct IdentityGrad_DY { + HOSTDEVICE T operator()(T x, T y, T out, T dout) const { return dout * x; } }; - -template -struct ElementwiseMulBroadCast2GradFunctor { - template - void operator()(Device d, X x, Y y, Z z, dX dx, dY dy, dZ dz, Pre pre, N n, - Post post) { - auto x_e = framework::EigenVector::Flatten(*x); - auto y_e = framework::EigenVector::Flatten(*y); - auto dz_e = framework::EigenVector::Flatten(*dz); - - auto y_e_bcast = y_e.reshape(Eigen::DSizes(1, n, 1)) - .broadcast(Eigen::DSizes(pre, 1, post)) - .reshape(Eigen::DSizes(x_e.size())); - if (dx) { - auto dx_e = framework::EigenVector::Flatten(*dx); - dx_e.device(d) = dz_e * y_e_bcast; - } - - if (dy) { - auto dy_e = framework::EigenVector::Flatten(*dy); - dy_e.device(d) = (x_e * dz_e) - .reshape(Eigen::DSizes(pre, n, post)) - .sum(Eigen::array{{0, 2}}); - } - } -}; - template class ElementwiseMulGradKernel : public framework::OpKernel { public: @@ -127,12 +61,11 @@ class ElementwiseMulGradKernel : public framework::OpKernel { auto* dx = ctx.Output(framework::GradVarName("X")); auto* dy = ctx.Output(framework::GradVarName("Y")); int axis = ctx.Attr("axis"); - ElementwiseGradCompute, - ElementwiseMulBroadCastGradFunctor, - ElementwiseMulBroadCast2GradFunctor>( - ctx, x, y, out, dout, axis, dx, dy); + ElemwiseGradCompute, + IdentityGrad_DY>(ctx, *x, *y, *out, *dout, axis, dx, + dy, IdentityGrad_DX(), + IdentityGrad_DY()); } }; - } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/elementwise_op_function.h b/paddle/fluid/operators/elementwise_op_function.h index ffda53a383ced411415e528886a23f28f6a62648..0b4238436ffcc586fe8bc7abbe4cfbc1654dcb88 100644 --- a/paddle/fluid/operators/elementwise_op_function.h +++ b/paddle/fluid/operators/elementwise_op_function.h @@ -301,7 +301,7 @@ struct ElemwiseGradNoBroadcast { dx_[i] = dx_op_(x_[i], y_[i], out_[i], dout_[i]); } if (dy_ != nullptr) { - dy_[i] = dx_op_(x_[i], y_[i], out_[i], dout_[i]); + dy_[i] = dy_op_(x_[i], y_[i], out_[i], dout_[i]); } }