From 977e9fcb274f9497a193baf59303f4a2024f1791 Mon Sep 17 00:00:00 2001 From: lvmengsi Date: Sat, 18 May 2019 21:27:25 +0800 Subject: [PATCH] support elementwise_sub double backward (#17476) add elementwise_sub_grad_grad op for backward of backward calculation --- .../elementwise/elementwise_sub_op.cc | 50 ++++++++++++++- .../elementwise/elementwise_sub_op.cu | 10 +++ .../elementwise/elementwise_sub_op.h | 28 +++++++++ .../fluid/tests/unittests/test_nn_grad.py | 62 +++++++++++++++++-- 4 files changed, 145 insertions(+), 5 deletions(-) diff --git a/paddle/fluid/operators/elementwise/elementwise_sub_op.cc b/paddle/fluid/operators/elementwise/elementwise_sub_op.cc index 04c87c1b2..b1ec10ea8 100644 --- a/paddle/fluid/operators/elementwise/elementwise_sub_op.cc +++ b/paddle/fluid/operators/elementwise/elementwise_sub_op.cc @@ -13,10 +13,48 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/elementwise/elementwise_sub_op.h" +#include +#include #include "paddle/fluid/operators/elementwise/elementwise_op.h" + +namespace paddle { +namespace operators { + +class ElementwiseSubDoubleGradDescMaker + : public framework::SingleGradOpDescMaker { + public: + using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; + + protected: + std::unique_ptr Apply() const override { + std::unique_ptr op(new framework::OpDesc()); + op->SetType("elementwise_sub_grad_grad"); + op->SetInput("Y", Input("Y")); + op->SetInput("DOut", Input(framework::GradVarName("Out"))); + op->SetInput("DDX", OutputGrad(framework::GradVarName("X"))); + op->SetInput("DDY", OutputGrad(framework::GradVarName("Y"))); + + op->SetAttrMap(Attrs()); + + op->SetOutput("DDOut", InputGrad(framework::GradVarName("Out"))); + return op; + } +}; + +} // namespace operators +} // namespace paddle + namespace ops = paddle::operators; REGISTER_ELEMWISE_GRAD_MAKER(elementwise_sub, Sub); -REGISTER_ELEMWISE_EXPLICIT_OP(elementwise_sub, "Sub", "Out = X - Y"); +REGISTER_ELEMWISE_EXPLICIT_OP_WITHOUT_GRAD(elementwise_sub, "Sub", + "Out = X - Y"); + +REGISTER_OPERATOR(elementwise_sub_grad, ops::ElementwiseOpExplicitGrad, + ops::ElementwiseGradOpInplace, + ops::ElementwiseGradNoBufVarsInference, + ops::ElementwiseSubDoubleGradDescMaker); +REGISTER_OPERATOR(elementwise_sub_grad_grad, + ops::ElementwiseOpDoubleGradWithoutDXDY); REGISTER_OP_CPU_KERNEL( elementwise_sub, @@ -30,3 +68,13 @@ REGISTER_OP_CPU_KERNEL( ops::ElementwiseSubGradKernel, ops::ElementwiseSubGradKernel, ops::ElementwiseSubGradKernel); +REGISTER_OP_CPU_KERNEL( + elementwise_sub_grad_grad, + ops::ElementwiseSubDoubleGradKernel, + ops::ElementwiseSubDoubleGradKernel, + ops::ElementwiseSubDoubleGradKernel, + ops::ElementwiseSubDoubleGradKernel); diff --git a/paddle/fluid/operators/elementwise/elementwise_sub_op.cu b/paddle/fluid/operators/elementwise/elementwise_sub_op.cu index f2adf1c83..52fad7fd0 100644 --- a/paddle/fluid/operators/elementwise/elementwise_sub_op.cu +++ b/paddle/fluid/operators/elementwise/elementwise_sub_op.cu @@ -33,3 +33,13 @@ REGISTER_OP_CUDA_KERNEL( ops::ElementwiseSubGradKernel, ops::ElementwiseSubGradKernel); +REGISTER_OP_CUDA_KERNEL( + elementwise_sub_grad_grad, + ops::ElementwiseSubDoubleGradKernel, + ops::ElementwiseSubDoubleGradKernel, + ops::ElementwiseSubDoubleGradKernel, + ops::ElementwiseSubDoubleGradKernel); diff --git a/paddle/fluid/operators/elementwise/elementwise_sub_op.h b/paddle/fluid/operators/elementwise/elementwise_sub_op.h index 770323fe5..5049d587b 100644 --- a/paddle/fluid/operators/elementwise/elementwise_sub_op.h +++ b/paddle/fluid/operators/elementwise/elementwise_sub_op.h @@ -68,5 +68,33 @@ class ElementwiseSubGradKernel : public ElemwiseGradKernel { ctx, *x, *y, *out, *dout, axis, dx, dy, SubGradDX(), SubGradDY()); } }; + +template +class ElementwiseSubDoubleGradKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + using Tensor = framework::Tensor; + + auto* y = ctx.Input("Y"); + auto* dout = ctx.Input("DOut"); + auto* ddx = ctx.Input("DDX"); + auto* ddy = ctx.Input("DDY"); + + auto* ddout = ctx.Output("DDOut"); + + // DDOut = ddx - ddy + if (ddout) { + Tensor ddx_safe, ddy_safe; + GetDoubleGradSafeTensor(ctx, dout, ddx, &ddx_safe); + GetDoubleGradSafeTensor(ctx, y, ddy, &ddy_safe); + + ddout->mutable_data(ctx.GetPlace()); + int axis = ctx.Attr("axis"); + ElementwiseComputeEx, DeviceContext, T>( + ctx, &ddx_safe, &ddy_safe, axis, SubFunctor(), ddout); + } + } +}; + } // namespace operators } // namespace paddle diff --git a/python/paddle/fluid/tests/unittests/test_nn_grad.py b/python/paddle/fluid/tests/unittests/test_nn_grad.py index 5710c93b7..b8c9d73e4 100644 --- a/python/paddle/fluid/tests/unittests/test_nn_grad.py +++ b/python/paddle/fluid/tests/unittests/test_nn_grad.py @@ -169,7 +169,7 @@ class TestElementwiseMulDoubleGradCheck(unittest.TestCase): @prog_scope() def func(self, place): # the shape of input variable shoule be clearly specified, not inlcude -1. - shape = [7, 9] + shape = [2, 3, 5, 7] eps = 0.005 dtype = np.float64 @@ -219,7 +219,7 @@ class TestElementwiseMulBroadcastDoubleGradCheck(unittest.TestCase): @prog_scope() def func(self, place): # the shape of input variable shoule be clearly specified, not inlcude -1. - shape = [7, 9] + shape = [2, 3, 5, 7] eps = 0.005 dtype = np.float64 @@ -246,7 +246,7 @@ class TestElementwiseAddDoubleGradCheck(unittest.TestCase): @prog_scope() def func(self, place): # the shape of input variable shoule be clearly specified, not inlcude -1. - shape = [7, 9] + shape = [2, 3, 5, 7] eps = 0.005 dtype = np.float64 @@ -273,7 +273,7 @@ class TestElementwiseAddBroadcastDoubleGradCheck(unittest.TestCase): @prog_scope() def func(self, place): # the shape of input variable shoule be clearly specified, not inlcude -1. - shape = [7, 9] + shape = [2, 3, 5, 7] eps = 0.005 dtype = np.float64 @@ -296,6 +296,60 @@ class TestElementwiseAddBroadcastDoubleGradCheck(unittest.TestCase): self.func(p) +class TestElementwiseSubDoubleGradCheck(unittest.TestCase): + @prog_scope() + def func(self, place): + # the shape of input variable shoule be clearly specified, not inlcude -1. + shape = [2, 3, 5, 7] + eps = 0.005 + dtype = np.float64 + + x = layers.data('x', shape, False, dtype) + y = layers.data('y', shape, False, dtype) + x.persistable = True + y.persistable = True + out = layers.elementwise_sub(x, y) + x_arr = np.random.uniform(-1, 1, shape).astype(dtype) + y_arr = np.random.uniform(-1, 1, shape).astype(dtype) + + gradient_checker.double_grad_check( + [x, y], out, x_init=[x_arr, y_arr], place=place, eps=eps) + + def test_grad(self): + places = [fluid.CPUPlace()] + if core.is_compiled_with_cuda(): + places.append(fluid.CUDAPlace(0)) + for p in places: + self.func(p) + + +class TestElementwiseSubBroadcastDoubleGradCheck(unittest.TestCase): + @prog_scope() + def func(self, place): + # the shape of input variable shoule be clearly specified, not inlcude -1. + shape = [2, 3, 5, 7] + eps = 0.005 + dtype = np.float64 + + x = layers.data('x', shape, False, dtype) + y = layers.data('y', shape[:-1], False, dtype) + x.persistable = True + y.persistable = True + out = layers.elementwise_sub(x, y, axis=0) + x_arr = np.random.uniform(-1, 1, shape).astype(dtype) + y_arr = np.random.uniform(-1, 1, shape[:-1]).astype(dtype) + + gradient_checker.double_grad_check( + [x, y], out, x_init=[x_arr, y_arr], place=place, eps=eps) + + def test_grad(self): + places = [fluid.CPUPlace()] + if core.is_compiled_with_cuda(): + places.append(fluid.CUDAPlace(0)) + for p in places: + self.func(p) + + class TestMulDoubleGradCheck(unittest.TestCase): @prog_scope() def func(self, place): -- GitLab