diff --git a/paddle/fluid/operators/activation_op.cc b/paddle/fluid/operators/activation_op.cc index 5a3660cee85762f3d76129dfb694eeb6d87bb52c..95214484dca99e718f8ec62225ce40ce3ffd7323 100644 --- a/paddle/fluid/operators/activation_op.cc +++ b/paddle/fluid/operators/activation_op.cc @@ -763,10 +763,28 @@ class ActivationOpDoubleGrad2 : public framework::OperatorWithKernel { } }; -// +// AbsGrad: dx=dy if x >=0 else -dy +// AbsDoubleGrad: ddy = ddx if x >=0 else -ddx +template +class AbsDoubleGradMaker : public ::paddle::framework::SingleGradOpMaker { + public: + using ::paddle::framework::SingleGradOpMaker::SingleGradOpMaker; + + protected: + void Apply(GradOpPtr op) const override { + op->SetType("abs_grad_grad"); + // input1: x + op->SetInput("X", this->Input("X")); + // input2: ddx + op->SetInput("DDX", this->OutputGrad(framework::GradVarName("X"))); + op->SetAttrMap(this->Attrs()); + // output: ddy + op->SetOutput("DDOut", this->InputGrad(framework::GradVarName("Out"))); + } +}; + // ReluGrad: dx = dy if y >= 0 else 0 // ReluGradGrad: ddy = ddx if y >= 0 else 0 -// template class ReluDoubleGradMaker : public ::paddle::framework::SingleGradOpMaker { public: @@ -1214,7 +1232,13 @@ REGISTER_OPERATOR( std::conditional>(), ops::ActFwdInplaceInferer, void>::type); REGISTER_OPERATOR(abs_grad, ops::ActivationOpGrad, - ops::ActivationGradOpInplaceInferer); + ops::ActivationGradOpInplaceInferer, + ops::AbsDoubleGradMaker, + ops::AbsDoubleGradMaker); +REGISTER_OPERATOR( + abs_grad_grad, + ops::ActivationOpDoubleGrad::FwdDeps()>, + ops::ActivationDoubleGradOpInplaceInferer); REGISTER_OP_CPU_KERNEL(abs, ops::ActivationKernel>, ops::ActivationGradKernel>); +REGISTER_OP_CPU_KERNEL( + abs_grad_grad, + ops::ActivationDoubleGradKernel>, + ops::ActivationDoubleGradKernel>, + ops::ActivationDoubleGradKernel>, + ops::ActivationDoubleGradKernel>, + ops::ActivationDoubleGradKernel>); /* ========================================================================== */ /* ========================== register checkpoint ===========================*/ diff --git a/paddle/fluid/operators/activation_op.cu b/paddle/fluid/operators/activation_op.cu index 48ec90471f0becf921e7e68eb8722544885aaa7a..072d952d2618d2c9dbbe27641dcd951a634753a0 100644 --- a/paddle/fluid/operators/activation_op.cu +++ b/paddle/fluid/operators/activation_op.cu @@ -160,7 +160,7 @@ REGISTER_OP_CUDA_KERNEL( ops::ExpGradFunctor>); /* ========================================================================== */ -/* ========================== exp register ============================ */ +/* ========================== abs register ============================ */ REGISTER_OP_CUDA_KERNEL( abs, ops::ActivationKernel>, @@ -180,4 +180,16 @@ REGISTER_OP_CUDA_KERNEL( ops::AbsGradFunctor>, ops::ActivationGradKernel>); +REGISTER_OP_CUDA_KERNEL( + abs_grad_grad, + ops::ActivationDoubleGradKernel>, + ops::ActivationDoubleGradKernel>, + ops::ActivationDoubleGradKernel>, + ops::ActivationDoubleGradKernel>, + ops::ActivationDoubleGradKernel>); /* ========================================================================== */ diff --git a/paddle/fluid/operators/activation_op.h b/paddle/fluid/operators/activation_op.h index 00a7c063c9155488d117332d5ef3541d16d76bdb..646f546bffb2ced3830c119b5f24f6d3fcad0e78 100644 --- a/paddle/fluid/operators/activation_op.h +++ b/paddle/fluid/operators/activation_op.h @@ -1430,6 +1430,27 @@ class ActivationDoubleGradKernel } }; +template +struct AbsGradGradFunctor : public BaseActivationFunctor { + template + void operator()(const Device& dev, const framework::Tensor* X, + const framework::Tensor* Out, const framework::Tensor* ddX, + framework::Tensor* ddOut, framework::Tensor* dOut, + framework::Tensor* dX) const { + auto* d = dev.eigen_device(); + auto ddx = framework::EigenVector::Flatten( + GET_DATA_SAFELY(ddX, "Input", "DDX", "AbsGradGrad")); + auto x = framework::EigenVector::Flatten( + GET_DATA_SAFELY(X, "Input", "X", "AbsGradGrad")); + if (ddOut) { + auto ddout = framework::EigenVector::Flatten( + GET_DATA_SAFELY(ddOut, "Output", "DDOut", "AbsGradGrad")); + ddout.device(*d) = ddx * x.sign(); + } + } + static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepX; } +}; + template struct ReluGradGradFunctor : public BaseActivationFunctor { template diff --git a/python/paddle/fluid/tests/unittests/test_activation_nn_grad.py b/python/paddle/fluid/tests/unittests/test_activation_nn_grad.py index db9e8d2c6bda011bef7c23e7fb51e246137a3906..e8b8a45fb677568947be82a1c77e6f2e7a17cdc1 100644 --- a/python/paddle/fluid/tests/unittests/test_activation_nn_grad.py +++ b/python/paddle/fluid/tests/unittests/test_activation_nn_grad.py @@ -147,5 +147,29 @@ class TestSquareDoubleGradCheck(unittest.TestCase): self.func(p) +class TestAbsDoubleGradCheck(unittest.TestCase): + @prog_scope() + def func(self, place): + # the shape of input variable should be clearly specified, not inlcude -1. + shape = [2, 3, 7, 9] + eps = 1e-6 + dtype = np.float64 + + x = layers.data('x', shape, False, dtype) + x.persistable = True + y = layers.abs(x) + x_arr = np.random.uniform(-1, 1, shape).astype(dtype) + + gradient_checker.double_grad_check( + [x], y, x_init=x_arr, place=place, eps=eps) + + def test_grad(self): + places = [fluid.CPUPlace()] + if core.is_compiled_with_cuda(): + places.append(fluid.CUDAPlace(0)) + for p in places: + self.func(p) + + if __name__ == "__main__": unittest.main()