From 11d3a38f25ce80c2d6d3d80f73e97799da75eead Mon Sep 17 00:00:00 2001 From: Kaipeng Deng Date: Mon, 13 May 2019 13:00:10 +0800 Subject: [PATCH] add double grad for square op (#17173) * add double grad for square. test=develop * formax code. test=develop * fix for grad sum. test=develop * refine shape. test=develop * refine extract. test=develop --- paddle/fluid/operators/activation_op.cc | 113 +++++++++++++----- paddle/fluid/operators/activation_op.cu | 18 +++ paddle/fluid/operators/activation_op.h | 86 ++++++++++++- .../fluid/tests/unittests/test_nn_grad.py | 24 ++++ 4 files changed, 210 insertions(+), 31 deletions(-) diff --git a/paddle/fluid/operators/activation_op.cc b/paddle/fluid/operators/activation_op.cc index f93474a12..b20d9c465 100644 --- a/paddle/fluid/operators/activation_op.cc +++ b/paddle/fluid/operators/activation_op.cc @@ -597,40 +597,31 @@ REGISTER_ACTIVATION_OP_MAKER(Square, SquareDoc); REGISTER_ACTIVATION_OP_MAKER(Softplus, SoftplusDoc); REGISTER_ACTIVATION_OP_MAKER(Softsign, SoftsignDoc); +template class ActivationOpDoubleGrad : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext* ctx) const override { - if (ctx->HasOutput("DOut")) { - ctx->ShareDim("Out", "DOut"); - ctx->ShareLoD("Out", "DOut"); + if (static_cast(kDepValue) & static_cast(kDepX)) { + if (ctx->HasOutput("DX")) { + ctx->ShareDim("X", "DX"); + ctx->ShareLoD("X", "DX"); + } + if (ctx->HasOutput("DDOut")) { + ctx->ShareDim("X", "DDOut"); + ctx->ShareLoD("X", "DDOut"); + } } - if (ctx->HasOutput("DDOut")) { - ctx->ShareDim("Out", "DDOut"); - ctx->ShareLoD("Out", "DDOut"); - } - } - - protected: - framework::OpKernelType GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - return GetKernelType(ctx, *this, "Out"); - } -}; - -class LeakyReluDoubleGrad : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override { - if (ctx->HasOutput("DX")) { - ctx->ShareDim("X", "DX"); - ctx->ShareLoD("X", "DX"); - } - if (ctx->HasOutput("DDOut")) { - ctx->ShareDim("X", "DDOut"); - ctx->ShareLoD("X", "DDOut"); + if (static_cast(kDepValue) & static_cast(kDepOut)) { + if (ctx->HasOutput("DOut")) { + ctx->ShareDim("Out", "DOut"); + ctx->ShareLoD("Out", "DOut"); + } + if (ctx->HasOutput("DDOut")) { + ctx->ShareDim("Out", "DDOut"); + ctx->ShareLoD("Out", "DDOut"); + } } } @@ -690,6 +681,33 @@ class LeakyReluDoubleGradMaker } }; +// square Grad: dx=2x*dy +// square GradGrad: ddy=2x*ddx, dx=2dy*ddx +class SquareDoubleGradMaker + : public ::paddle::framework::SingleGradOpDescMaker { + public: + using ::paddle::framework::SingleGradOpDescMaker::SingleGradOpDescMaker; + + protected: + std::unique_ptr<::paddle::framework::OpDesc> Apply() const override { + auto* op = new ::paddle::framework::OpDesc(); + op->SetType("square_grad_grad"); + op->SetInput("X", Input("X")); + // Out@GRAD: dy + op->SetInput("DOut", Input(framework::GradVarName("Out"))); + // X@GRAD@GRAD: ddx + op->SetInput("DDX", OutputGrad(framework::GradVarName("X"))); + + op->SetAttrMap(Attrs()); + + // X@GRAD: dx + op->SetOutput("DX", InputGrad("X")); + // Out@GRAD@GRAD: ddy + op->SetOutput("DDOut", InputGrad(framework::GradVarName("Out"))); + return std::unique_ptr<::paddle::framework::OpDesc>(op); + } +}; + } // namespace operators } // namespace paddle @@ -727,6 +745,7 @@ namespace plat = paddle::platform; FOR_EACH_ACTIVATION_OP(REGISTER_ACTIVATION_OP); FOR_EACH_ACTIVATION_OP(REGISTER_ACTIVATION_CPU_KERNEL); +/* ========================== relu register ============================= */ REGISTER_OPERATOR( relu, ops::ActivationOp, ops::ReluOpMaker, ops::ActivationOpInferVarType, ops::ActivationGradOpDescMaker::FwdDeps()>, @@ -734,7 +753,9 @@ REGISTER_OPERATOR( REGISTER_OPERATOR(relu_grad, ops::ActivationOpGrad, paddle::framework::SingleOpInplaceInToOut, ops::ReluDoubleGradMaker); -REGISTER_OPERATOR(relu_grad_grad, ops::ActivationOpDoubleGrad); +REGISTER_OPERATOR( + relu_grad_grad, + ops::ActivationOpDoubleGrad::FwdDeps()>); REGISTER_ACTIVATION_CPU_KERNEL(relu, Relu, ReluFunctor, ReluGradFunctor); @@ -746,7 +767,9 @@ REGISTER_OP_CPU_KERNEL( ops::ReluGradGradFunctor>, ops::ActivationDoubleGradKernel>); +/* ========================================================================== */ +/* ======================== leaky relu register ============================ */ REGISTER_OPERATOR( leaky_relu, ops::ActivationOp, ops::LeakyReluOpMaker, ops::ActivationOpInferVarType, @@ -755,7 +778,10 @@ REGISTER_OPERATOR( REGISTER_OPERATOR(leaky_relu_grad, ops::ActivationOpGrad, paddle::framework::SingleOpInplaceInToOut, ops::LeakyReluDoubleGradMaker); -REGISTER_OPERATOR(leaky_relu_grad_grad, ops::LeakyReluDoubleGrad); +REGISTER_OPERATOR( + leaky_relu_grad_grad, + ops::ActivationOpDoubleGrad::FwdDeps()>); + REGISTER_ACTIVATION_CPU_KERNEL(leaky_relu, LeakyRelu, LeakyReluFunctor, LeakyReluGradFunctor); REGISTER_OP_CPU_KERNEL( @@ -766,3 +792,30 @@ REGISTER_OP_CPU_KERNEL( ops::LeakyReluGradGradFunctor>, ops::ActivationDoubleGradKernel< plat::CPUDeviceContext, ops::LeakyReluGradGradFunctor>); +/* ========================================================================== */ + +/* ========================== square register ============================ */ +REGISTER_OPERATOR( + square, ops::ActivationOp, ops::SquareOpMaker, + ops::ActivationOpInferVarType, + ops::ActivationGradOpDescMaker::FwdDeps()>, + paddle::framework::SingleOpInplaceInToOut); +REGISTER_OPERATOR(square_grad, ops::ActivationOpGrad, + paddle::framework::SingleOpInplaceInToOut, + ops::SquareDoubleGradMaker); +REGISTER_OPERATOR( + square_grad_grad, + ops::ActivationOpDoubleGrad::FwdDeps()>); + +REGISTER_ACTIVATION_CPU_KERNEL(square, Square, SquareFunctor, + SquareGradFunctor); + +REGISTER_OP_CPU_KERNEL( + square_grad_grad, + ops::SquareDoubleGradKernel>, + ops::SquareDoubleGradKernel>, + ops::SquareDoubleGradKernel>); +/* ========================================================================== */ diff --git a/paddle/fluid/operators/activation_op.cu b/paddle/fluid/operators/activation_op.cu index 377e5a4af..63c4f0a88 100644 --- a/paddle/fluid/operators/activation_op.cu +++ b/paddle/fluid/operators/activation_op.cu @@ -33,6 +33,7 @@ namespace plat = paddle::platform; FOR_EACH_ACTIVATION_OP(REGISTER_ACTIVATION_CUDA_KERNEL); +/* ======================== leaky relu register ============================ */ REGISTER_ACTIVATION_CUDA_KERNEL(leaky_relu, LeakyRelu, LeakyReluFunctor, LeakyReluGradFunctor); @@ -44,7 +45,9 @@ REGISTER_OP_CUDA_KERNEL( ops::LeakyReluGradGradFunctor>, ops::ActivationDoubleGradKernel< plat::CUDADeviceContext, ops::LeakyReluGradGradFunctor>); +/* ========================================================================== */ +/* =========================== relu register ============================ */ REGISTER_ACTIVATION_CUDA_KERNEL(relu, Relu, ReluFunctor, ReluGradFunctor); REGISTER_OP_CUDA_KERNEL( @@ -55,3 +58,18 @@ REGISTER_OP_CUDA_KERNEL( ops::ReluGradGradFunctor>, ops::ActivationDoubleGradKernel>); +/* ========================================================================== */ + +/* =========================== square register ============================ */ +REGISTER_ACTIVATION_CUDA_KERNEL(square, Square, SquareFunctor, + SquareGradFunctor); + +REGISTER_OP_CUDA_KERNEL( + square_grad_grad, + ops::SquareDoubleGradKernel>, + ops::SquareDoubleGradKernel>, + ops::SquareDoubleGradKernel>); +/* ========================================================================== */ diff --git a/paddle/fluid/operators/activation_op.h b/paddle/fluid/operators/activation_op.h index 5848d9dad..f2eee754b 100644 --- a/paddle/fluid/operators/activation_op.h +++ b/paddle/fluid/operators/activation_op.h @@ -1,4 +1,5 @@ /* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at @@ -1358,6 +1359,90 @@ struct LeakyReluGradGradFunctor : public BaseActivationFunctor { static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepX; } }; +template +struct SquareGradGradFunctor : public BaseActivationFunctor { + template + void operator()(const Device& dev, const framework::Tensor* X, + const framework::Tensor* ddX, framework::Tensor* ddOut, + const framework::Tensor* dOut, framework::Tensor* dX) const { + auto* d = dev.eigen_device(); + auto ddx = framework::EigenVector::Flatten(detail::Ref(ddX)); + auto x = framework::EigenVector::Flatten(detail::Ref(X)); + if (ddOut) { + auto ddout = framework::EigenVector::Flatten(detail::Ref(ddOut)); + ddout.device(*d) = ddx * static_cast(2) * x; + } + if (dX) { + auto dx = framework::EigenVector::Flatten(detail::Ref(dX)); + auto dout = framework::EigenVector::Flatten(detail::Ref(dOut)); + dx.device(*d) = ddx * static_cast(2) * dout; + } + } + static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepX; } +}; + +// TODO(dengkaipeng): double gradient calculation for Square/Sqrt need +// DOut(dy) as input(not output), tensor extraction is different from +// others. Impliment extraction kernel seperately here. +inline void ExtractDoubleGradTensorWithInputDOut( + const framework::ExecutionContext& ctx, const framework::Tensor** X, + const framework::Tensor** ddX, framework::Tensor** dX, + const framework::Tensor** dOut, framework::Tensor** ddOut) { + // extract ddX(output), ddOut(input) + auto ddx_var = ctx.InputVar("DDX"); + auto ddo_var = ctx.OutputVar("DDOut"); + PADDLE_ENFORCE(ddx_var != nullptr, + "Cannot get input Variable Out, variable name = %s", + ctx.op().Input("DDX")); + *ddX = ctx.Input("DDX"); + if (ddo_var) { + *ddOut = ctx.Output("DDOut"); + } + PADDLE_ENFORCE(*ddX != nullptr, + "Cannot get output tensor DDX, variable name = %s", + ctx.op().Output("DDX")); + + // extract x(input), dx(output) + auto x_var = ctx.InputVar("X"); + PADDLE_ENFORCE(x_var != nullptr, + "Cannot get input Variable Out, variable name = %s", + ctx.op().Input("X")); + auto dx_var = ctx.OutputVar("DX"); + *X = ctx.Input("X"); + if (dx_var) { + *dX = ctx.Output("DX"); + } + + // extract dOut(input) + auto dout_var = ctx.InputVar("DOut"); + if (dout_var) { + *dOut = ctx.Input("DOut"); + } +} + +template +class SquareDoubleGradKernel + : public framework::OpKernel { + public: + using T = typename Functor::ELEMENT_TYPE; + void Compute(const framework::ExecutionContext& ctx) const override { + const framework::Tensor *X, *ddX, *dOut; + X = ddX = dOut = nullptr; + framework::Tensor *dX, *ddOut; + dX = ddOut = nullptr; + + ExtractDoubleGradTensorWithInputDOut(ctx, &X, &ddX, &dX, &dOut, &ddOut); + + dX->mutable_data(X->dims(), ctx.GetPlace()); + ddOut->mutable_data(ctx.GetPlace()); + + auto& place = ctx.template device_context(); + + Functor functor; + functor(place, X, ddX, ddOut, dOut, dX); + } +}; + } // namespace operators } // namespace paddle @@ -1381,7 +1466,6 @@ struct LeakyReluGradGradFunctor : public BaseActivationFunctor { __macro(round, Round, RoundFunctor, ZeroGradFunctor); \ __macro(reciprocal, Reciprocal, ReciprocalFunctor, ReciprocalGradFunctor); \ __macro(log, Log, LogFunctor, LogGradFunctor); \ - __macro(square, Square, SquareFunctor, SquareGradFunctor); \ __macro(brelu, BRelu, BReluFunctor, BReluGradFunctor); \ __macro(soft_relu, SoftRelu, SoftReluFunctor, SoftReluGradFunctor); \ __macro(pow, Pow, PowFunctor, PowGradFunctor); \ diff --git a/python/paddle/fluid/tests/unittests/test_nn_grad.py b/python/paddle/fluid/tests/unittests/test_nn_grad.py index df0d8e034..4b6b43b71 100644 --- a/python/paddle/fluid/tests/unittests/test_nn_grad.py +++ b/python/paddle/fluid/tests/unittests/test_nn_grad.py @@ -115,5 +115,29 @@ class TestConvDoubleGradCheck(unittest.TestCase): self.func(p) +class TestSquareDoubleGradCheck(unittest.TestCase): + @prog_scope() + def func(self, place): + # the shape of input variable shoule be clearly specified, not inlcude -1. + shape = [17, 23] + eps = 0.005 + dtype = np.float64 + + x = layers.data('x', shape, False, dtype) + x.persistable = True + y = layers.square(x) + x_arr = np.random.uniform(-1, 1, shape).astype(dtype) + + gradient_checker.double_grad_check( + [x], y, x_init=x_arr, place=place, eps=eps) + + def test_grad(self): + places = [fluid.CPUPlace()] + if core.is_compiled_with_cuda(): + places.append(fluid.CUDAPlace(0)) + for p in places: + self.func(p) + + if __name__ == "__main__": unittest.main() -- GitLab