diff --git a/paddle/fluid/operators/activation_op_npu.cc b/paddle/fluid/operators/activation_op_npu.cc old mode 100755 new mode 100644 index d815a3eeb4d81c70f7eb6ab729afee6b04ffe12f..5f2925784e42442a1d42dbf1fcb34ee2c39a41c4 --- a/paddle/fluid/operators/activation_op_npu.cc +++ b/paddle/fluid/operators/activation_op_npu.cc @@ -386,6 +386,35 @@ class SquareNPUKernel : public framework::OpKernel { } }; +template +class SquareGradNPUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* x = ctx.Input("X"); + auto* dout = ctx.Input(framework::GradVarName("Out")); + auto* dx = ctx.Output(framework::GradVarName("X")); + + auto factor = static_cast(2.0); + + auto place = ctx.GetPlace(); + auto stream = + ctx.template device_context() + .stream(); + // Step 1: Compute x_muls_factor = factor * x + Tensor x_muls_factor(x->type()); + x_muls_factor.mutable_data(x->dims(), place); + const auto& runner_muls_1 = + NpuOpRunner("Muls", {*x}, {x_muls_factor}, {{"value", factor}}); + runner_muls_1.Run(stream); + + // Step 2: Compute dx = dout * factor * x + dx->mutable_data(place); + const auto& runner_mul_2 = + NpuOpRunner("Mul", {*dout, x_muls_factor}, {*dx}, {}); + runner_mul_2.Run(stream); + } +}; + template class SigmoidNPUKernel : public framework::OpKernel { public: @@ -869,6 +898,12 @@ REGISTER_OP_NPU_KERNEL( paddle::platform::float16>, ops::SquareNPUKernel); +REGISTER_OP_NPU_KERNEL( + square_grad, + ops::SquareGradNPUKernel, + ops::SquareNPUKernel); + REGISTER_OP_NPU_KERNEL( sigmoid, ops::SigmoidNPUKernel, ops::SigmoidNPUKernel