diff --git a/paddle/fluid/operators/activation_op.cc b/paddle/fluid/operators/activation_op.cc index 91fbfba382447d0c7019efb00ad04ea67e743df8..2100264823bb6b9e20b15389e044c6c6c9cd6fb9 100644 --- a/paddle/fluid/operators/activation_op.cc +++ b/paddle/fluid/operators/activation_op.cc @@ -619,6 +619,28 @@ class ActivationOpDoubleGrad : public framework::OperatorWithKernel { } }; +class LeakyReluDoubleGrad : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext* ctx) const override { + if (ctx->HasOutput("DX")) { + ctx->ShareDim("X", "DX"); + ctx->ShareLoD("X", "DX"); + } + if (ctx->HasOutput("DDOut")) { + ctx->ShareDim("X", "DDOut"); + ctx->ShareLoD("X", "DDOut"); + } + } + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override { + return GetKernelType(ctx, *this, "DDX"); + } +}; + // // ReluGrad: dx = dy if y >= 0 else 0 // ReluGradGrad: ddy = ddx if y >= 0 else 0 @@ -643,6 +665,29 @@ class ReluDoubleGradMaker : public ::paddle::framework::SingleGradOpDescMaker { } }; +// leaky_relu Grad: dx=dy if y>=0 else alpha * dy +// leaky_relu GradGrad: ddy=ddx if y>=0 else alpha * ddx +class LeakyReluDoubleGradMaker + : public ::paddle::framework::SingleGradOpDescMaker { + public: + using ::paddle::framework::SingleGradOpDescMaker::SingleGradOpDescMaker; + + protected: + std::unique_ptr<::paddle::framework::OpDesc> Apply() const override { + auto* op = new ::paddle::framework::OpDesc(); + op->SetType("leaky_relu_grad_grad"); + // input1: X + op->SetInput("X", Input("X")); + // X@GRAD@GRAD: ddx + op->SetInput("DDX", OutputGrad(framework::GradVarName("X"))); + op->SetAttrMap(Attrs()); + // Out@GRAD@GRAD: ddy + op->SetOutput("DX", InputGrad("X")); + op->SetOutput("DDOut", InputGrad(framework::GradVarName("Out"))); + return std::unique_ptr<::paddle::framework::OpDesc>(op); + } +}; + } // namespace operators } // namespace paddle @@ -699,3 +744,23 @@ REGISTER_OP_CPU_KERNEL( ops::ReluGradGradFunctor>, ops::ActivationDoubleGradKernel>); + +REGISTER_OPERATOR( + leaky_relu, ops::ActivationOp, ops::LeakyReluOpMaker, + ops::ActivationOpInferVarType, + ops::ActivationGradOpDescMaker::FwdDeps()>, + paddle::framework::SingleOpInplaceInToOut); +REGISTER_OPERATOR(leaky_relu_grad, ops::ActivationOpGrad, + paddle::framework::SingleOpInplaceInToOut, + ops::LeakyReluDoubleGradMaker); +REGISTER_OPERATOR(leaky_relu_grad_grad, ops::LeakyReluDoubleGrad); +REGISTER_ACTIVATION_CPU_KERNEL(leaky_relu, LeakyRelu, LeakyReluFunctor, + LeakyReluGradFunctor); +REGISTER_OP_CPU_KERNEL( + leaky_relu_grad_grad, + ops::ActivationDoubleGradKernel>, + ops::ActivationDoubleGradKernel>, + ops::ActivationDoubleGradKernel< + plat::CPUDeviceContext, ops::LeakyReluGradGradFunctor>); diff --git a/paddle/fluid/operators/activation_op.cu b/paddle/fluid/operators/activation_op.cu index 20f3f3605805906424856cb345f962477f31dec2..377e5a4af75d56abb4676fa5396051ce8b152bdf 100644 --- a/paddle/fluid/operators/activation_op.cu +++ b/paddle/fluid/operators/activation_op.cu @@ -33,6 +33,18 @@ namespace plat = paddle::platform; FOR_EACH_ACTIVATION_OP(REGISTER_ACTIVATION_CUDA_KERNEL); +REGISTER_ACTIVATION_CUDA_KERNEL(leaky_relu, LeakyRelu, LeakyReluFunctor, + LeakyReluGradFunctor); + +REGISTER_OP_CUDA_KERNEL( + leaky_relu_grad_grad, + ops::ActivationDoubleGradKernel>, + ops::ActivationDoubleGradKernel>, + ops::ActivationDoubleGradKernel< + plat::CUDADeviceContext, ops::LeakyReluGradGradFunctor>); + REGISTER_ACTIVATION_CUDA_KERNEL(relu, Relu, ReluFunctor, ReluGradFunctor); REGISTER_OP_CUDA_KERNEL( diff --git a/paddle/fluid/operators/activation_op.h b/paddle/fluid/operators/activation_op.h index 8259a392b2d44f70edd47d4132a74674856bf1a6..d306e20037a39d0170eb284dbd295f68d172e7c8 100644 --- a/paddle/fluid/operators/activation_op.h +++ b/paddle/fluid/operators/activation_op.h @@ -1208,45 +1208,31 @@ inline void ExtractActivationDoubleGradTensor( const framework::Tensor** Out, const framework::Tensor** ddX, framework::Tensor** dX, framework::Tensor** dOut, framework::Tensor** ddOut) { - auto out_var = ctx.InputVar("Out"); auto ddx_var = ctx.InputVar("DDX"); auto ddo_var = ctx.OutputVar("DDOut"); - auto do_var = ctx.OutputVar("DOut"); - PADDLE_ENFORCE(out_var != nullptr, - "Cannot get input Variable Out, variable name = %s", - ctx.op().Input("Out")); PADDLE_ENFORCE(ddx_var != nullptr, - "Cannot get input Variable %s, variable name = %s", "DDX", + "Cannot get input Variable Out, variable name = %s", ctx.op().Input("DDX")); if (CanBeUsedBySelectedRows.count(ctx.op().Type())) { - *Out = paddle::framework::GetLoDTensorOrSelectedRowsValueFromVar(*out_var); *ddX = paddle::framework::GetLoDTensorOrSelectedRowsValueFromVar(*ddx_var); if (ddo_var) { *ddOut = paddle::framework::GetMutableLoDTensorOrSelectedRowsValueFromVar( ddo_var); } - if (do_var) { - *dOut = paddle::framework::GetMutableLoDTensorOrSelectedRowsValueFromVar( - do_var); - } } else { - *Out = ctx.Input("Out"); *ddX = ctx.Input("DDX"); if (ddo_var) { *ddOut = ctx.Output("DDOut"); } - if (do_var) { - *dOut = ctx.Output("DOut"); - } } PADDLE_ENFORCE(*ddX != nullptr, - "Cannot get output tensor %s, variable name = %s", "DDX", + "Cannot get output tensor DDX, variable name = %s", ctx.op().Output("DDX")); if (static_cast(kDepValue) & static_cast(kDepX)) { auto x_var = ctx.InputVar("X"); PADDLE_ENFORCE(x_var != nullptr, - "Cannot get input tensor X, variable name = %s", + "Cannot get input Variable Out, variable name = %s", ctx.op().Input("X")); auto dx_var = ctx.OutputVar("DX"); if (CanBeUsedBySelectedRows.count(ctx.op().Type())) { @@ -1262,9 +1248,33 @@ inline void ExtractActivationDoubleGradTensor( } } } else { - VLOG(10) << " Inplace activation of Op : " << ctx.op().Type(); + VLOG(10) << "Inplace activation of Op: " << ctx.op().Type(); *X = *ddX; } + if (static_cast(kDepValue) & static_cast(kDepOut)) { + auto out_var = ctx.InputVar("Out"); + PADDLE_ENFORCE(out_var != nullptr, + "Cannot get input tensor Out, variable name = %s", + ctx.op().Input("Out")); + auto dout_var = ctx.OutputVar("DOut"); + if (CanBeUsedBySelectedRows.count(ctx.op().Type())) { + *Out = + paddle::framework::GetLoDTensorOrSelectedRowsValueFromVar(*out_var); + if (dout_var) { + *dOut = + paddle::framework::GetMutableLoDTensorOrSelectedRowsValueFromVar( + dout_var); + } + } else { + *Out = ctx.Input("Out"); + if (dout_var) { + *dOut = ctx.Output("DOut"); + } + } + } else { + VLOG(10) << "Inplace activation of Op: " << ctx.op().Type(); + *Out = *ddX; + } } template @@ -1318,6 +1328,36 @@ struct ReluGradGradFunctor : public BaseActivationFunctor { static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepOut; } }; +template +struct LeakyReluGradGradFunctor : public BaseActivationFunctor { + float alpha; + typename BaseActivationFunctor::AttrPair GetAttrs() { + return {{"alpha", &alpha}}; + } + template + void operator()(const Device& dev, const framework::Tensor* X, + const framework::Tensor* Out, const framework::Tensor* ddX, + framework::Tensor* ddOut, framework::Tensor* dOut, + framework::Tensor* dX) const { + auto* d = dev.eigen_device(); + auto ddx = framework::EigenVector::Flatten(detail::Ref(ddX)); + auto x = framework::EigenVector::Flatten(detail::Ref(X)); + if (ddOut) { + auto ddout = framework::EigenVector::Flatten(detail::Ref(ddOut)); + ddout.device(*d) = ddx * + ((x >= static_cast(0)).template cast().eval() + + static_cast(alpha) * + (x < static_cast(0)).template cast().eval()) + .template cast(); + } + if (dX) { + auto dx = framework::EigenVector::Flatten(detail::Ref(dX)); + dx.device(*d) = dx.constant(static_cast(0)); + } + } + static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepX; } +}; + } // namespace operators } // namespace paddle @@ -1349,7 +1389,6 @@ struct ReluGradGradFunctor : public BaseActivationFunctor { __macro(softplus, Softplus, SoftplusFunctor, SoftplusGradFunctor); \ __macro(softsign, Softsign, SoftsignFunctor, SoftsignGradFunctor); \ __macro(relu6, Relu6, Relu6Functor, Relu6GradFunctor); \ - __macro(leaky_relu, LeakyRelu, LeakyReluFunctor, LeakyReluGradFunctor); \ __macro(tanh_shrink, TanhShrink, TanhShrinkFunctor, TanhShrinkGradFunctor); \ __macro(elu, ELU, ELUFunctor, ELUGradFunctor); \ __macro(hard_shrink, HardShrink, HardShrinkFunctor, HardShrinkGradFunctor); \ diff --git a/python/paddle/fluid/tests/unittests/test_nn_grad.py b/python/paddle/fluid/tests/unittests/test_nn_grad.py index c4f26386c92e11b1486fdc03f1fab0c16528014d..e2d540fea558a997eb0570dee79a91881f4dac0c 100644 --- a/python/paddle/fluid/tests/unittests/test_nn_grad.py +++ b/python/paddle/fluid/tests/unittests/test_nn_grad.py @@ -68,5 +68,31 @@ class TestReluDoubleGradCheck(unittest.TestCase): self.func(p) +class TestLeakyReluDoubleGradCheck(unittest.TestCase): + @prog_scope() + def func(self, place): + # the shape of input variable shoule be clearly specified, not inlcude -1. + shape = [3, 7] + eps = 0.005 + alpha = 0.2 + dtype = np.float64 + + x = layers.data('x', shape, False, dtype) + x.persistable = True + y = layers.leaky_relu(x, alpha=alpha) + x_arr = np.random.uniform(-1, 1, shape).astype(dtype) + x_arr[np.abs(x_arr) < 0.005] = 0.02 + + gradient_checker.double_grad_check( + [x], y, x_init=x_arr, place=place, eps=eps) + + def test_grad(self): + places = [fluid.CPUPlace()] + if core.is_compiled_with_cuda(): + places.append(fluid.CUDAPlace(0)) + for p in places: + self.func(p) + + if __name__ == "__main__": unittest.main()