diff --git a/paddle/fluid/operators/p_norm_op_npu.cc b/paddle/fluid/operators/p_norm_op_npu.cc index 3c5d1a36e9c273f3a5b50d445b47dcb80dd79266..ef2346204b9c0f62dee09f8c82c69503ea23f817 100644 --- a/paddle/fluid/operators/p_norm_op_npu.cc +++ b/paddle/fluid/operators/p_norm_op_npu.cc @@ -81,6 +81,122 @@ class PnormNPUKernel : public framework::OpKernel { } }; +template +class PnormGradNPUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + using Tensor = framework::Tensor; + auto* x = ctx.Input("X"); + auto* y = ctx.Input("Out"); + auto* dy = ctx.Input(framework::GradVarName("Out")); + auto* dx = ctx.Output(framework::GradVarName("X")); + + auto place = ctx.GetPlace(); + dx->mutable_data(place); + + auto xdim = x->dims(); + float porder = ctx.Attr("porder"); + bool keepdim = ctx.Attr("keepdim"); + + int axis = ctx.Attr("axis"); + axis = axis < 0 ? xdim.size() + axis : axis; + + auto stream = + ctx.template device_context() + .stream(); + + Tensor y_share(y->type()); + Tensor dy_share(dy->type()); + y_share.ShareDataWith(*y); + dy_share.ShareDataWith(*dy); + auto ydim = xdim; + if (!keepdim) { + ydim[axis] = 1; + } else { + ydim = y->dims(); + } + y_share.Resize(ydim); + dy_share.Resize(ydim); + + if (porder == 0) { + FillNpuTensorWithConstant(dx, static_cast(0)); + dx->Resize(xdim); + } else if (porder == INFINITY || porder == -INFINITY) { + Tensor x_abs; + x_abs.mutable_data(xdim, place); + const auto& r_abs = NpuOpRunner("Abs", {*x}, {x_abs}, {}); + r_abs.Run(stream); + + Tensor t_cond; + t_cond.mutable_data(xdim, place); + const auto& r_equal = + NpuOpRunner("Equal", {x_abs, y_share}, {t_cond}, {}); + r_equal.Run(stream); + + Tensor t_zero; + t_zero.mutable_data({1}, place); + FillNpuTensorWithConstant(&t_zero, static_cast(0)); + + Tensor x_sign; + x_sign.mutable_data(xdim, place); + const auto& r_sign = NpuOpRunner("Sign", {*x}, {x_sign}, {}); + r_sign.Run(stream); + + const auto& r_mul = NpuOpRunner("Mul", {x_sign, dy_share}, {*dx}, {}); + r_mul.Run(stream); + + const auto& r_sel = + NpuOpRunner("SelectV2", {t_cond, *dx, t_zero}, {*dx}, {}); + r_sel.Run(stream); + } else { + Tensor x_abs; + x_abs.mutable_data(xdim, place); + const auto& r_abs = NpuOpRunner("Abs", {*x}, {x_abs}, {}); + r_abs.Run(stream); + + Tensor x_sign; + x_sign.mutable_data(xdim, place); + const auto& r_sign = NpuOpRunner("Sign", {*x}, {x_sign}, {}); + r_sign.Run(stream); + + Tensor y_pow; + y_pow.mutable_data(ydim, place); + if (porder >= 1) { + const auto& r_pow1 = NpuOpRunner( + "Power", {x_abs}, {x_abs}, + {{"power", (porder - 1)}, {"scale", 1.0f}, {"shift", 0.0f}}); + r_pow1.Run(stream); + + const auto& r_pow2 = NpuOpRunner( + "Power", {y_share}, {y_pow}, + {{"power", (porder - 1)}, {"scale", 1.0f}, {"shift", 0.0f}}); + r_pow2.Run(stream); + + const auto& r_div = NpuOpRunner("DivNoNan", {x_abs, y_pow}, {*dx}, {}); + r_div.Run(stream); + } else { + const auto& r_pow1 = NpuOpRunner( + "Power", {x_abs}, {x_abs}, + {{"power", (1 - porder)}, {"scale", 1.0f}, {"shift", 0.0f}}); + r_pow1.Run(stream); + + const auto& r_pow2 = NpuOpRunner( + "Power", {y_share}, {y_pow}, + {{"power", (1 - porder)}, {"scale", 1.0f}, {"shift", 0.0f}}); + r_pow2.Run(stream); + + const auto& r_div = NpuOpRunner("DivNoNan", {y_pow, x_abs}, {*dx}, {}); + r_div.Run(stream); + } + + const auto& r_mul1 = NpuOpRunner("Mul", {*dx, x_sign}, {*dx}, {}); + r_mul1.Run(stream); + + const auto& r_mul2 = NpuOpRunner("Mul", {*dx, dy_share}, {*dx}, {}); + r_mul2.Run(stream); + } + } +}; } // namespace operators } // namespace paddle @@ -90,3 +206,7 @@ namespace plat = paddle::platform; REGISTER_OP_NPU_KERNEL( p_norm, ops::PnormNPUKernel, ops::PnormNPUKernel); + +REGISTER_OP_NPU_KERNEL( + p_norm_grad, ops::PnormGradNPUKernel, + ops::PnormGradNPUKernel); diff --git a/python/paddle/fluid/tests/unittests/npu/test_p_norm_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_p_norm_op_npu.py index 9f990c0e29f6eb1a04f66c6ab83855d21c3ce549..3b75cba60b103fce118d2b0aca6eacf50fe9b809 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_p_norm_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_p_norm_op_npu.py @@ -27,7 +27,6 @@ paddle.enable_static() class TestPnormOp(OpTest): def set_npu(self): self.__class__.use_npu = True - self.__class__.no_need_check_grad = True def setUp(self): self.set_npu() @@ -51,6 +50,12 @@ class TestPnormOp(OpTest): else: self.check_output_with_place(paddle.NPUPlace(0)) + def test_check_grad(self): + if self.dtype == "float16": + return + self.check_grad_with_place( + paddle.NPUPlace(0), ['X'], 'Out', user_defined_grads=self.gradient) + def init_test_case(self): self.shape = [2, 3, 4, 5] self.axis = 1 @@ -131,6 +136,16 @@ class TestPnormOp5(TestPnormOp3): self.init_dtype() +class TestPnormOp6(TestPnormOp3): + def init_test_case(self): + self.shape = [2, 3, 4, 5] + self.axis = 1 + self.epsilon = 1e-12 + self.porder = 0.5 + self.keepdim = False + self.init_dtype() + + class TestPnormOpfp16(TestPnormOp): def init_dtype(self): self.dtype = "float16"