From 5634d2caff672af909abefd2a2ee73918e4bb9c1 Mon Sep 17 00:00:00 2001 From: Zhong Hui Date: Sun, 27 Sep 2020 20:37:01 +0800 Subject: [PATCH] [cherry pick] double grad support for abs double grad support for abs --- paddle/fluid/operators/activation_op.cc | 42 +++++++++++++++++-- paddle/fluid/operators/activation_op.cu | 14 ++++++- paddle/fluid/operators/activation_op.h | 21 ++++++++++ .../unittests/test_activation_nn_grad.py | 24 +++++++++++ 4 files changed, 97 insertions(+), 4 deletions(-) diff --git a/paddle/fluid/operators/activation_op.cc b/paddle/fluid/operators/activation_op.cc index 4587b494b3..ce53209bc3 100644 --- a/paddle/fluid/operators/activation_op.cc +++ b/paddle/fluid/operators/activation_op.cc @@ -711,10 +711,28 @@ class ActivationOpDoubleGrad2 : public framework::OperatorWithKernel { } }; -// +// AbsGrad: dx=dy if x >=0 else -dy +// AbsDoubleGrad: ddy = ddx if x >=0 else -ddx +template +class AbsDoubleGradMaker : public ::paddle::framework::SingleGradOpMaker { + public: + using ::paddle::framework::SingleGradOpMaker::SingleGradOpMaker; + + protected: + void Apply(GradOpPtr op) const override { + op->SetType("abs_grad_grad"); + // input1: x + op->SetInput("X", this->Input("X")); + // input2: ddx + op->SetInput("DDX", this->OutputGrad(framework::GradVarName("X"))); + op->SetAttrMap(this->Attrs()); + // output: ddy + op->SetOutput("DDOut", this->InputGrad(framework::GradVarName("Out"))); + } +}; + // ReluGrad: dx = dy if y >= 0 else 0 // ReluGradGrad: ddy = ddx if y >= 0 else 0 -// template class ReluDoubleGradMaker : public ::paddle::framework::SingleGradOpMaker { public: @@ -1162,7 +1180,13 @@ REGISTER_OPERATOR( std::conditional>(), ops::ActFwdInplaceInferer, void>::type); REGISTER_OPERATOR(abs_grad, ops::ActivationOpGrad, - ops::ActivationGradOpInplaceInference); + ops::ActivationGradOpInplaceInference, + ops::AbsDoubleGradMaker, + ops::AbsDoubleGradMaker); +REGISTER_OPERATOR( + abs_grad_grad, + ops::ActivationOpDoubleGrad::FwdDeps()>, + ops::ActivationDoubleGradOpInplaceInference); REGISTER_OP_CPU_KERNEL(abs, ops::ActivationKernel>, ops::ActivationGradKernel>); +REGISTER_OP_CPU_KERNEL( + abs_grad_grad, + ops::ActivationDoubleGradKernel>, + ops::ActivationDoubleGradKernel>, + ops::ActivationDoubleGradKernel>, + ops::ActivationDoubleGradKernel>, + ops::ActivationDoubleGradKernel>); /* ========================================================================== */ diff --git a/paddle/fluid/operators/activation_op.cu b/paddle/fluid/operators/activation_op.cu index 48ec90471f..072d952d26 100644 --- a/paddle/fluid/operators/activation_op.cu +++ b/paddle/fluid/operators/activation_op.cu @@ -160,7 +160,7 @@ REGISTER_OP_CUDA_KERNEL( ops::ExpGradFunctor>); /* ========================================================================== */ -/* ========================== exp register ============================ */ +/* ========================== abs register ============================ */ REGISTER_OP_CUDA_KERNEL( abs, ops::ActivationKernel>, @@ -180,4 +180,16 @@ REGISTER_OP_CUDA_KERNEL( ops::AbsGradFunctor>, ops::ActivationGradKernel>); +REGISTER_OP_CUDA_KERNEL( + abs_grad_grad, + ops::ActivationDoubleGradKernel>, + ops::ActivationDoubleGradKernel>, + ops::ActivationDoubleGradKernel>, + ops::ActivationDoubleGradKernel>, + ops::ActivationDoubleGradKernel>); /* ========================================================================== */ diff --git a/paddle/fluid/operators/activation_op.h b/paddle/fluid/operators/activation_op.h index ec3c39097a..3818a37e93 100644 --- a/paddle/fluid/operators/activation_op.h +++ b/paddle/fluid/operators/activation_op.h @@ -1318,6 +1318,27 @@ class ActivationDoubleGradKernel } }; +template +struct AbsGradGradFunctor : public BaseActivationFunctor { + template + void operator()(const Device& dev, const framework::Tensor* X, + const framework::Tensor* Out, const framework::Tensor* ddX, + framework::Tensor* ddOut, framework::Tensor* dOut, + framework::Tensor* dX) const { + auto* d = dev.eigen_device(); + auto ddx = framework::EigenVector::Flatten( + GET_DATA_SAFELY(ddX, "Input", "DDX", "AbsGradGrad")); + auto x = framework::EigenVector::Flatten( + GET_DATA_SAFELY(X, "Input", "X", "AbsGradGrad")); + if (ddOut) { + auto ddout = framework::EigenVector::Flatten( + GET_DATA_SAFELY(ddOut, "Output", "DDOut", "AbsGradGrad")); + ddout.device(*d) = ddx * x.sign(); + } + } + static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepX; } +}; + template struct ReluGradGradFunctor : public BaseActivationFunctor { template diff --git a/python/paddle/fluid/tests/unittests/test_activation_nn_grad.py b/python/paddle/fluid/tests/unittests/test_activation_nn_grad.py index db9e8d2c6b..e8b8a45fb6 100644 --- a/python/paddle/fluid/tests/unittests/test_activation_nn_grad.py +++ b/python/paddle/fluid/tests/unittests/test_activation_nn_grad.py @@ -147,5 +147,29 @@ class TestSquareDoubleGradCheck(unittest.TestCase): self.func(p) +class TestAbsDoubleGradCheck(unittest.TestCase): + @prog_scope() + def func(self, place): + # the shape of input variable should be clearly specified, not inlcude -1. + shape = [2, 3, 7, 9] + eps = 1e-6 + dtype = np.float64 + + x = layers.data('x', shape, False, dtype) + x.persistable = True + y = layers.abs(x) + x_arr = np.random.uniform(-1, 1, shape).astype(dtype) + + gradient_checker.double_grad_check( + [x], y, x_init=x_arr, place=place, eps=eps) + + def test_grad(self): + places = [fluid.CPUPlace()] + if core.is_compiled_with_cuda(): + places.append(fluid.CUDAPlace(0)) + for p in places: + self.func(p) + + if __name__ == "__main__": unittest.main() -- GitLab